{"id":165965,"date":"2024-11-05T14:20:19","date_gmt":"2024-11-05T06:20:19","guid":{"rendered":"https:\/\/server.hk\/cnblog\/165965\/"},"modified":"2024-11-05T14:20:20","modified_gmt":"2024-11-05T06:20:20","slug":"%e5%bf%ab%e9%80%9f%e9%ab%98%e6%95%88%e7%9a%84%e6%96%b9%e6%b3%95%ef%bc%9a%e5%a6%82%e4%bd%95%e5%a4%a7%e8%a6%8f%e6%a8%a1%e5%b0%8e%e5%85%a5%e6%95%b8%e6%93%9a%e5%ba%ab-%e5%a6%82%e4%bd%95%e5%b0%8e%e5%85%a5","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/165965\/","title":{"rendered":"\u5feb\u901f\u9ad8\u6548\u7684\u65b9\u6cd5\uff1a\u5982\u4f55\u5927\u898f\u6a21\u5c0e\u5165\u6578\u64da\u5eab (\u5982\u4f55\u5c0e\u5165\u5927\u91cf\u6578\u64da\u5eab)"},"content":{"rendered":"<h1 id=\"%e5%bf%ab%e9%80%9f%e9%ab%98%e6%95%88%e7%9a%84%e6%96%b9%e6%b3%95%ef%bc%9a%e5%a6%82%e4%bd%95%e5%a4%a7%e8%a6%8f%e6%a8%a1%e5%b0%8e%e5%85%a5%e6%95%b8%e6%93%9a%e5%ba%ab-QxaLUFzExW\">\u5feb\u901f\u9ad8\u6548\u7684\u65b9\u6cd5\uff1a\u5982\u4f55\u5927\u898f\u6a21\u5c0e\u5165\u6578\u64da\u5eab<\/h1>\n<p>\u5728\u7576\u4eca\u6578\u64da\u9a45\u52d5\u7684\u4e16\u754c\u4e2d\uff0c\u4f01\u696d\u548c\u958b\u767c\u8005\u7d93\u5e38\u9700\u8981\u8655\u7406\u5927\u91cf\u6578\u64da\u3002\u7121\u8ad6\u662f\u5f9e\u820a\u7cfb\u7d71\u9077\u79fb\u6578\u64da\uff0c\u9084\u662f\u5f9e\u5916\u90e8\u4f86\u6e90\u5c0e\u5165\u6578\u64da\uff0c\u5feb\u901f\u9ad8\u6548\u5730\u5c0e\u5165\u6578\u64da\u5eab\u90fd\u662f\u4e00\u9805\u91cd\u8981\u7684\u6280\u80fd\u3002\u672c\u6587\u5c07\u63a2\u8a0e\u4e00\u4e9b\u6709\u6548\u7684\u65b9\u6cd5\u548c\u6700\u4f73\u5be6\u8e10\uff0c\u4ee5\u5e6b\u52a9\u60a8\u5728\u5927\u898f\u6a21\u5c0e\u5165\u6578\u64da\u5eab\u6642\u63d0\u9ad8\u6548\u7387\u3002<\/p>\n<h2 id=\"1-%e7%a2%ba%e5%ae%9a%e6%95%b8%e6%93%9a%e4%be%86%e6%ba%90-QxaLUFzExW\">1. \u78ba\u5b9a\u6578\u64da\u4f86\u6e90<\/h2>\n<p>\u5728\u958b\u59cb\u5c0e\u5165\u6578\u64da\u4e4b\u524d\uff0c\u9996\u5148\u9700\u8981\u78ba\u5b9a\u6578\u64da\u7684\u4f86\u6e90\u3002\u6578\u64da\u53ef\u4ee5\u4f86\u81ea\u591a\u7a2e\u4f86\u6e90\uff0c\u4f8b\u5982\uff1a<\/p>\n<ul>\n<li>CSV\u6216Excel\u6587\u4ef6<\/li>\n<li>\u5176\u4ed6\u6578\u64da\u5eab\uff08\u5982MySQL\u3001PostgreSQL\u7b49\uff09<\/li>\n<li>API\u63a5\u53e3<\/li>\n<li>\u7db2\u7d61\u722c\u87f2\u6293\u53d6\u7684\u6578\u64da<\/li>\n<\/ul>\n<p>\u4e86\u89e3\u6578\u64da\u4f86\u6e90\u5f8c\uff0c\u60a8\u53ef\u4ee5\u9078\u64c7\u6700\u5408\u9069\u7684\u5c0e\u5165\u65b9\u6cd5\u3002<\/p>\n<h2 id=\"2-%e6%95%b8%e6%93%9a%e6%b8%85%e7%90%86%e8%88%87%e8%bd%89%e6%8f%9b-QxaLUFzExW\">2. \u6578\u64da\u6e05\u7406\u8207\u8f49\u63db<\/h2>\n<p>\u5728\u5c0e\u5165\u6578\u64da\u4e4b\u524d\uff0c\u9032\u884c\u6578\u64da\u6e05\u7406\u548c\u8f49\u63db\u662f\u81f3\u95dc\u91cd\u8981\u7684\u3002\u9019\u4e00\u904e\u7a0b\u5305\u62ec\uff1a<\/p>\n<ul>\n<li>\u53bb\u9664\u91cd\u8907\u6578\u64da<\/li>\n<li>\u586b\u88dc\u7f3a\u5931\u503c<\/li>\n<li>\u683c\u5f0f\u8f49\u63db\uff08\u5982\u65e5\u671f\u683c\u5f0f\u3001\u6578\u5b57\u683c\u5f0f\u7b49\uff09<\/li>\n<\/ul>\n<p>\u4f7f\u7528\u5de5\u5177\u5982Python\u7684Pandas\u5eab\uff0c\u53ef\u4ee5\u8f15\u9b06\u5730\u9032\u884c\u6578\u64da\u6e05\u7406\u548c\u8f49\u63db\u3002\u4f8b\u5982\uff1a<\/p>\n<pre><code>import pandas as pd\n\n# \u8b80\u53d6CSV\u6587\u4ef6\ndata = pd.read_csv('data.csv')\n\n# \u53bb\u9664\u91cd\u8907\u884c\ndata = data.drop_duplicates()\n\n# \u586b\u88dc\u7f3a\u5931\u503c\ndata.fillna(0, inplace=True)\n\n# \u5c07\u65e5\u671f\u683c\u5f0f\u8f49\u63db\u70ba\u6a19\u6e96\u683c\u5f0f\ndata['date'] = pd.to_datetime(data['date'])\n<\/code><\/pre>\n<h2 id=\"3-%e4%bd%bf%e7%94%a8%e6%89%b9%e9%87%8f%e5%b0%8e%e5%85%a5%e5%b7%a5%e5%85%b7-QxaLUFzExW\">3. \u4f7f\u7528\u6279\u91cf\u5c0e\u5165\u5de5\u5177<\/h2>\n<p>\u8a31\u591a\u6578\u64da\u5eab\u7ba1\u7406\u7cfb\u7d71\uff08DBMS\uff09\u63d0\u4f9b\u4e86\u6279\u91cf\u5c0e\u5165\u5de5\u5177\uff0c\u53ef\u4ee5\u986f\u8457\u63d0\u9ad8\u5c0e\u5165\u901f\u5ea6\u3002\u4f8b\u5982\uff1a<\/p>\n<ul>\n<li><strong>MySQL\uff1a<\/strong>\u4f7f\u7528LOAD DATA INFILE\u547d\u4ee4\u53ef\u4ee5\u5feb\u901f\u5c0e\u5165CSV\u6587\u4ef6\u3002<\/li>\n<li><strong>PostgreSQL\uff1a<\/strong>\u4f7f\u7528COPY\u547d\u4ee4\u53ef\u4ee5\u5f9e\u6587\u4ef6\u6216\u6a19\u6e96\u8f38\u5165\u5c0e\u5165\u6578\u64da\u3002<\/li>\n<\/ul>\n<p>\u4ee5\u4e0b\u662fMySQL\u7684\u793a\u4f8b\uff1a<\/p>\n<pre><code>LOAD DATA INFILE '\/path\/to\/data.csv'\nINTO TABLE your_table\nFIELDS TERMINATED BY ','\nLINES TERMINATED BY 'n'\nIGNORE 1 ROWS;\n<\/code><\/pre>\n<h2 id=\"4-%e5%88%86%e6%89%b9%e5%b0%8e%e5%85%a5-QxaLUFzExW\">4. \u5206\u6279\u5c0e\u5165<\/h2>\n<p>\u5c0d\u65bc\u7279\u5225\u5927\u7684\u6578\u64da\u96c6\uff0c\u5206\u6279\u5c0e\u5165\u662f\u4e00\u7a2e\u6709\u6548\u7684\u65b9\u6cd5\u3002\u5c07\u6578\u64da\u5206\u6210\u5c0f\u6279\u6b21\u9032\u884c\u5c0e\u5165\uff0c\u53ef\u4ee5\u6e1b\u5c11\u7cfb\u7d71\u8ca0\u64d4\u4e26\u63d0\u9ad8\u6210\u529f\u7387\u3002\u9019\u53ef\u4ee5\u901a\u904e\u7de8\u5beb\u8173\u672c\u4f86\u5be6\u73fe\uff0c\u4f8b\u5982\u4f7f\u7528Python\u7684SQLAlchemy\u5eab\uff1a<\/p>\n<pre><code>from sqlalchemy import create_engine\nimport pandas as pd\n\n# \u5275\u5efa\u6578\u64da\u5eab\u9023\u63a5\nengine = create_engine('mysql+pymysql:\/\/user:password@host\/db')\n\n# \u8b80\u53d6\u6578\u64da\ndata = pd.read_csv('data.csv', chunksize=1000)\n\n# \u5206\u6279\u5c0e\u5165\nfor chunk in data:\n    chunk.to_sql('your_table', con=engine, if_exists='append', index=False)\n<\/code><\/pre>\n<h2 id=\"5-%e7%9b%a3%e6%8e%a7%e8%88%87%e9%8c%af%e8%aa%a4%e8%99%95%e7%90%86-QxaLUFzExW\">5. \u76e3\u63a7\u8207\u932f\u8aa4\u8655\u7406<\/h2>\n<p>\u5728\u5c0e\u5165\u904e\u7a0b\u4e2d\uff0c\u76e3\u63a7\u9032\u5ea6\u548c\u8655\u7406\u932f\u8aa4\u662f\u975e\u5e38\u91cd\u8981\u7684\u3002\u53ef\u4ee5\u4f7f\u7528\u65e5\u8a8c\u8a18\u9304\u5c0e\u5165\u904e\u7a0b\u4e2d\u7684\u6bcf\u4e00\u6b65\uff0c\u4e26\u5728\u51fa\u73fe\u932f\u8aa4\u6642\u9032\u884c\u76f8\u61c9\u7684\u8655\u7406\u3002\u9019\u6a23\u53ef\u4ee5\u78ba\u4fdd\u6578\u64da\u7684\u5b8c\u6574\u6027\u548c\u6e96\u78ba\u6027\u3002<\/p>\n<h2 id=\"%e7%b8%bd%e7%b5%90-QxaLUFzExW\">\u7e3d\u7d50<\/h2>\n<p>\u5927\u898f\u6a21\u5c0e\u5165\u6578\u64da\u5eab\u662f\u4e00\u9805\u6311\u6230\uff0c\u4f46\u901a\u904e\u78ba\u5b9a\u6578\u64da\u4f86\u6e90\u3001\u9032\u884c\u6578\u64da\u6e05\u7406\u3001\u4f7f\u7528\u6279\u91cf\u5c0e\u5165\u5de5\u5177\u3001\u5206\u6279\u5c0e\u5165\u4ee5\u53ca\u76e3\u63a7\u9032\u5ea6\uff0c\u60a8\u53ef\u4ee5\u6709\u6548\u5730\u5b8c\u6210\u9019\u4e00\u4efb\u52d9\u3002\u7121\u8ad6\u60a8\u662f\u4f7f\u7528 <a href=\"https:\/\/server.hk\">\u9999\u6e2fVPS<\/a> \u9084\u662f\u5176\u4ed6\u670d\u52d9\uff0c\u9078\u64c7\u5408\u9069\u7684\u5de5\u5177\u548c\u65b9\u6cd5\u5c07\u6709\u52a9\u65bc\u63d0\u9ad8\u6578\u64da\u5c0e\u5165\u7684\u6548\u7387\u548c\u6e96\u78ba\u6027\u3002\u4e86\u89e3\u66f4\u591a\u95dc\u65bc <a href=\"https:\/\/server.hk\">\u9999\u6e2f\u4f3a\u670d\u5668<\/a> \u7684\u4fe1\u606f\uff0c\u5e6b\u52a9\u60a8\u5728\u6578\u64da\u7ba1\u7406\u4e0a\u53d6\u5f97\u6210\u529f\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u63a2\u7d22\u5feb\u901f\u9ad8\u6548\u7684\u65b9\u6cd5\uff0c\u5be6\u73fe\u5927\u898f\u6a21\u6578\u64da\u5eab\u5c0e\u5165\uff0c\u63d0\u5347\u6578\u64da\u8655\u7406\u6548\u7387\uff0c\u52a9\u529b\u696d\u52d9\u589e\u9577\u8207\u6c7a\u7b56\u652f\u6301\u3002<\/p>\n","protected":false},"author":0,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[101],"tags":[],"class_list":["post-165965","post","type-post","status-publish","format-standard","hentry","category-database"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165965","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=165965"}],"version-history":[{"count":1,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165965\/revisions"}],"predecessor-version":[{"id":165966,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165965\/revisions\/165966"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=165965"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=165965"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=165965"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}