{"id":195685,"date":"2024-11-13T17:03:20","date_gmt":"2024-11-13T09:03:20","guid":{"rendered":"https:\/\/server.hk\/cnblog\/195685\/"},"modified":"2024-11-13T17:03:21","modified_gmt":"2024-11-13T09:03:21","slug":"%e5%86%8d%e8%a6%8b%ef%bc%81%e4%b8%8d%e5%86%8d%e4%bd%bf%e7%94%a8-pandas-%e4%b8%ad%e7%9a%84-merge-%e6%96%b9%e6%b3%95","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/195685\/","title":{"rendered":"\u518d\u898b\uff01\u4e0d\u518d\u4f7f\u7528 Pandas \u4e2d\u7684 Merge \u65b9\u6cd5"},"content":{"rendered":"<h1 id=\"%e5%86%8d%e8%a6%8b%ef%bc%81%e4%b8%8d%e5%86%8d%e4%bd%bf%e7%94%a8-pandas-%e4%b8%ad%e7%9a%84-merge-%e6%96%b9%e6%b3%95-vLNUZwGDQQ\">\u518d\u898b\uff01\u4e0d\u518d\u4f7f\u7528 Pandas \u4e2d\u7684 Merge \u65b9\u6cd5<\/h1>\n<p>\u5728\u6578\u64da\u79d1\u5b78\u548c\u6578\u64da\u5206\u6790\u7684\u9818\u57df\u4e2d\uff0cPython \u7684 Pandas \u5eab\u7121\u7591\u662f\u6700\u53d7\u6b61\u8fce\u7684\u5de5\u5177\u4e4b\u4e00\u3002\u5b83\u63d0\u4f9b\u4e86\u591a\u7a2e\u6578\u64da\u64cd\u4f5c\u65b9\u6cd5\uff0c\u5176\u4e2d\u6700\u5e38\u7528\u7684\u5c31\u662f\u5408\u4f75\uff08merge\uff09\u6578\u64da\u6846\uff08DataFrame\uff09\u3002\u7136\u800c\uff0c\u96a8\u8457\u6280\u8853\u7684\u9032\u6b65\u548c\u9700\u6c42\u7684\u8b8a\u5316\uff0c\u8a31\u591a\u6578\u64da\u79d1\u5b78\u5bb6\u548c\u5206\u6790\u5e2b\u958b\u59cb\u5c0b\u627e\u66f4\u9ad8\u6548\u7684\u66ff\u4ee3\u65b9\u6848\u3002\u672c\u6587\u5c07\u63a2\u8a0e\u70ba\u4ec0\u9ebc\u5728\u67d0\u4e9b\u60c5\u6cc1\u4e0b\uff0c\u4f7f\u7528 Pandas \u7684 merge \u65b9\u6cd5\u53ef\u80fd\u4e0d\u518d\u662f\u6700\u4f73\u9078\u64c7\uff0c\u4ee5\u53ca\u53ef\u4ee5\u8003\u616e\u7684\u66ff\u4ee3\u65b9\u6848\u3002<\/p>\n<h2 id=\"%e7%82%ba%e4%bb%80%e9%ba%bc%e4%b8%8d%e5%86%8d%e4%bd%bf%e7%94%a8-merge-%e6%96%b9%e6%b3%95-vLNUZwGDQQ\">\u70ba\u4ec0\u9ebc\u4e0d\u518d\u4f7f\u7528 Merge \u65b9\u6cd5<\/h2>\n<p>\u9996\u5148\uff0cPandas \u7684 merge \u65b9\u6cd5\u5728\u8655\u7406\u5927\u578b\u6578\u64da\u96c6\u6642\u53ef\u80fd\u6703\u51fa\u73fe\u6027\u80fd\u74f6\u9838\u3002\u7576\u6578\u64da\u96c6\u7684\u5927\u5c0f\u589e\u52a0\u6642\uff0cmerge \u64cd\u4f5c\u7684\u8a08\u7b97\u6642\u9593\u548c\u5167\u5b58\u6d88\u8017\u4e5f\u6703\u96a8\u4e4b\u589e\u52a0\u3002\u9019\u5c0d\u65bc\u9700\u8981\u5feb\u901f\u97ff\u61c9\u7684\u61c9\u7528\u7a0b\u5e8f\u4f86\u8aaa\uff0c\u53ef\u80fd\u6703\u9020\u6210\u4e0d\u5fc5\u8981\u7684\u5ef6\u9072\u3002<\/p>\n<p>\u5176\u6b21\uff0cmerge \u65b9\u6cd5\u7684\u8a9e\u6cd5\u76f8\u5c0d\u8907\u96dc\uff0c\u5c0d\u65bc\u521d\u5b78\u8005\u4f86\u8aaa\uff0c\u7406\u89e3\u548c\u4f7f\u7528\u53ef\u80fd\u6703\u6709\u4e00\u5b9a\u7684\u96e3\u5ea6\u3002\u7279\u5225\u662f\u5728\u9700\u8981\u9032\u884c\u591a\u91cd\u5408\u4f75\u6216\u4f7f\u7528\u4e0d\u540c\u7684\u5408\u4f75\u689d\u4ef6\u6642\uff0c\u4ee3\u78bc\u7684\u53ef\u8b80\u6027\u548c\u7dad\u8b77\u6027\u6703\u53d7\u5230\u5f71\u97ff\u3002<\/p>\n<h2 id=\"%e6%9b%bf%e4%bb%a3%e6%96%b9%e6%a1%88-vLNUZwGDQQ\">\u66ff\u4ee3\u65b9\u6848<\/h2>\n<p>\u96a8\u8457\u6578\u64da\u8655\u7406\u9700\u6c42\u7684\u8b8a\u5316\uff0c\u8a31\u591a\u958b\u767c\u8005\u958b\u59cb\u63a2\u7d22\u5176\u4ed6\u6578\u64da\u5408\u4f75\u7684\u65b9\u6cd5\u3002\u4ee5\u4e0b\u662f\u5e7e\u500b\u503c\u5f97\u8003\u616e\u7684\u66ff\u4ee3\u65b9\u6848\uff1a<\/p>\n<h3 id=\"1-%e4%bd%bf%e7%94%a8-dask-vLNUZwGDQQ\">1. \u4f7f\u7528 Dask<\/h3>\n<p>Dask \u662f\u4e00\u500b\u9748\u6d3b\u7684\u4e26\u884c\u8a08\u7b97\u5eab\uff0c\u65e8\u5728\u8655\u7406\u5927\u898f\u6a21\u6578\u64da\u96c6\u3002\u5b83\u7684 API \u8207 Pandas \u985e\u4f3c\uff0c\u4f46\u80fd\u5920\u5728\u591a\u6838\u8655\u7406\u5668\u4e0a\u904b\u884c\uff0c\u5f9e\u800c\u63d0\u9ad8\u6027\u80fd\u3002\u4f7f\u7528 Dask \u9032\u884c\u6578\u64da\u5408\u4f75\u7684\u57fa\u672c\u8a9e\u6cd5\u5982\u4e0b\uff1a<\/p>\n<pre><code>import dask.dataframe as dd\n\n# \u8b80\u53d6\u6578\u64da\ndf1 = dd.read_csv('data1.csv')\ndf2 = dd.read_csv('data2.csv')\n\n# \u5408\u4f75\u6578\u64da\nmerged_df = dd.merge(df1, df2, on='key_column', how='inner')\n<\/code><\/pre>\n<p>\u9019\u6a23\u7684\u65b9\u5f0f\u4e0d\u50c5\u80fd\u5920\u8655\u7406\u66f4\u5927\u7684\u6578\u64da\u96c6\uff0c\u9084\u80fd\u63d0\u9ad8\u8a08\u7b97\u6548\u7387\u3002<\/p>\n<h3 id=\"2-%e4%bd%bf%e7%94%a8-polars-vLNUZwGDQQ\">2. \u4f7f\u7528 Polars<\/h3>\n<p>Polars \u662f\u4e00\u500b\u9ad8\u6027\u80fd\u7684 DataFrame \u5eab\uff0c\u5c08\u70ba\u901f\u5ea6\u548c\u5167\u5b58\u6548\u7387\u800c\u8a2d\u8a08\u3002\u5b83\u7684\u5408\u4f75\u64cd\u4f5c\u6bd4 Pandas \u66f4\u5feb\uff0c\u7279\u5225\u662f\u5728\u8655\u7406\u5927\u578b\u6578\u64da\u96c6\u6642\u3002\u4ee5\u4e0b\u662f\u4f7f\u7528 Polars \u9032\u884c\u5408\u4f75\u7684\u793a\u4f8b\uff1a<\/p>\n<pre><code>import polars as pl\n\n# \u8b80\u53d6\u6578\u64da\ndf1 = pl.read_csv('data1.csv')\ndf2 = pl.read_csv('data2.csv')\n\n# \u5408\u4f75\u6578\u64da\nmerged_df = df1.join(df2, on='key_column', how='inner')\n<\/code><\/pre>\n<p>Polars \u7684 API \u8a2d\u8a08\u7c21\u6f54\uff0c\u6613\u65bc\u4f7f\u7528\uff0c\u4e26\u4e14\u5728\u6027\u80fd\u4e0a\u8868\u73fe\u512a\u7570\u3002<\/p>\n<h3 id=\"3-%e4%bd%bf%e7%94%a8-sqlalchemy-vLNUZwGDQQ\">3. \u4f7f\u7528 SQLAlchemy<\/h3>\n<p>\u5982\u679c\u6578\u64da\u5b58\u5132\u5728\u6578\u64da\u5eab\u4e2d\uff0c\u4f7f\u7528 SQLAlchemy \u9032\u884c\u6578\u64da\u5408\u4f75\u4e5f\u662f\u4e00\u500b\u4e0d\u932f\u7684\u9078\u64c7\u3002\u9019\u6a23\u53ef\u4ee5\u76f4\u63a5\u5728\u6578\u64da\u5eab\u4e2d\u9032\u884c\u64cd\u4f5c\uff0c\u6e1b\u5c11\u6578\u64da\u50b3\u8f38\u7684\u958b\u92b7\u3002\u4ee5\u4e0b\u662f\u4f7f\u7528 SQLAlchemy \u7684\u793a\u4f8b\uff1a<\/p>\n<pre><code>from sqlalchemy import create_engine\nimport pandas as pd\n\n# \u5275\u5efa\u6578\u64da\u5eab\u9023\u63a5\nengine = create_engine('sqlite:\/\/\/mydatabase.db')\n\n# \u8b80\u53d6\u6578\u64da\ndf1 = pd.read_sql('SELECT * FROM table1', engine)\ndf2 = pd.read_sql('SELECT * FROM table2', engine)\n\n# \u5408\u4f75\u6578\u64da\nmerged_df = pd.merge(df1, df2, on='key_column', how='inner')\n<\/code><\/pre>\n<p>\u9019\u7a2e\u65b9\u6cd5\u80fd\u5920\u5145\u5206\u5229\u7528\u6578\u64da\u5eab\u7684\u6027\u80fd\uff0c\u7279\u5225\u662f\u5728\u8655\u7406\u5927\u91cf\u6578\u64da\u6642\u3002<\/p>\n<h2 id=\"%e7%b5%90%e8%ab%96-vLNUZwGDQQ\">\u7d50\u8ad6<\/h2>\n<p>\u96d6\u7136 Pandas \u7684 merge \u65b9\u6cd5\u5728\u8a31\u591a\u60c5\u6cc1\u4e0b\u4ecd\u7136\u6709\u6548\uff0c\u4f46\u96a8\u8457\u6578\u64da\u96c6\u7684\u589e\u5927\u548c\u9700\u6c42\u7684\u8b8a\u5316\uff0c\u5c0b\u627e\u66f4\u9ad8\u6548\u7684\u66ff\u4ee3\u65b9\u6848\u8b8a\u5f97\u8d8a\u4f86\u8d8a\u91cd\u8981\u3002Dask\u3001Polars \u548c SQLAlchemy \u7b49\u5de5\u5177\u63d0\u4f9b\u4e86\u66f4\u597d\u7684\u6027\u80fd\u548c\u53ef\u64f4\u5c55\u6027\uff0c\u503c\u5f97\u6578\u64da\u79d1\u5b78\u5bb6\u548c\u5206\u6790\u5e2b\u8003\u616e\u3002<\/p>\n<p>\u5982\u679c\u60a8\u6b63\u5728\u5c0b\u627e\u9ad8\u6548\u7684\u6578\u64da\u8655\u7406\u89e3\u6c7a\u65b9\u6848\uff0c\u7121\u8ad6\u662f\u4f7f\u7528 <a href=\"https:\/\/server.hk\">VPS<\/a> \u9084\u662f\u5176\u4ed6\u670d\u52d9\uff0c\u4e86\u89e3\u9019\u4e9b\u5de5\u5177\u5c07\u6709\u52a9\u65bc\u63d0\u5347\u60a8\u7684\u5de5\u4f5c\u6548\u7387\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u63a2\u7d22\u5982\u4f55\u5728\u4e0d\u4f7f\u7528 Pandas \u7684 Merge \u65b9\u6cd5\u7684\u60c5\u6cc1\u4e0b\uff0c\u9032\u884c\u6578\u64da\u5408\u4f75\uff0c\u63d0\u5347\u6578\u64da\u8655\u7406\u6548\u7387\u8207\u9748\u6d3b\u6027\u3002<\/p>\n","protected":false},"author":0,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[101],"tags":[],"class_list":["post-195685","post","type-post","status-publish","format-standard","hentry","category-database"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/195685","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=195685"}],"version-history":[{"count":1,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/195685\/revisions"}],"predecessor-version":[{"id":195686,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/195685\/revisions\/195686"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=195685"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=195685"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=195685"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}