{"id":157818,"date":"2024-11-02T23:42:21","date_gmt":"2024-11-02T15:42:21","guid":{"rendered":"https:\/\/server.hk\/cnblog\/157818\/"},"modified":"2024-11-02T23:42:22","modified_gmt":"2024-11-02T15:42:22","slug":"%e9%ab%98%e6%95%88%e4%be%bf%e6%8d%b7%e7%9a%84%e6%95%b8%e6%93%9a%e6%8e%a1%e9%9b%86%ef%bc%9a%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%e8%88%87%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5%ef%bc%88%e6%95%b8","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/157818\/","title":{"rendered":"\u9ad8\u6548\u4fbf\u6377\u7684\u6578\u64da\u63a1\u96c6\uff1a\u6578\u64da\u722c\u87f2\u8207\u6578\u64da\u5eab\u9023\u63a5\uff08\u6578\u64da\u722c\u53d6\u548c\u6578\u64da\u5eab\u9023\u63a5\uff09"},"content":{"rendered":"<h1 id=\"%e9%ab%98%e6%95%88%e4%be%bf%e6%8d%b7%e7%9a%84%e6%95%b8%e6%93%9a%e6%8e%a1%e9%9b%86%ef%bc%9a%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%e8%88%87%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5-graNKYvAQY\">\u9ad8\u6548\u4fbf\u6377\u7684\u6578\u64da\u63a1\u96c6\uff1a\u6578\u64da\u722c\u87f2\u8207\u6578\u64da\u5eab\u9023\u63a5<\/h1>\n<p>\u5728\u7576\u4eca\u6578\u64da\u9a45\u52d5\u7684\u6642\u4ee3\uff0c\u6578\u64da\u7684\u6536\u96c6\u548c\u5206\u6790\u5c0d\u65bc\u4f01\u696d\u548c\u7814\u7a76\u6a5f\u69cb\u4f86\u8aaa\u81f3\u95dc\u91cd\u8981\u3002\u6578\u64da\u722c\u87f2\uff08Web Scraping\uff09\u548c\u6578\u64da\u5eab\u9023\u63a5\u662f\u5169\u7a2e\u5e38\u898b\u7684\u6578\u64da\u63a1\u96c6\u6280\u8853\uff0c\u80fd\u5920\u5e6b\u52a9\u7528\u6236\u9ad8\u6548\u5730\u7372\u53d6\u6240\u9700\u7684\u6578\u64da\u3002\u672c\u6587\u5c07\u6df1\u5165\u63a2\u8a0e\u9019\u5169\u7a2e\u6280\u8853\u7684\u57fa\u672c\u6982\u5ff5\u3001\u5be6\u73fe\u65b9\u6cd5\u53ca\u5176\u61c9\u7528\u5834\u666f\u3002<\/p>\n<h2 id=\"%e4%bb%80%e9%ba%bc%e6%98%af%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%ef%bc%9f-graNKYvAQY\">\u4ec0\u9ebc\u662f\u6578\u64da\u722c\u87f2\uff1f<\/h2>\n<p>\u6578\u64da\u722c\u87f2\u662f\u4e00\u7a2e\u81ea\u52d5\u5316\u7684\u6578\u64da\u6536\u96c6\u6280\u8853\uff0c\u901a\u904e\u6a21\u64ec\u4eba\u985e\u7528\u6236\u5728\u7db2\u9801\u4e0a\u7684\u64cd\u4f5c\uff0c\u5f9e\u4e92\u806f\u7db2\u4e0a\u63d0\u53d6\u4fe1\u606f\u3002\u9019\u4e9b\u4fe1\u606f\u53ef\u4ee5\u662f\u6587\u672c\u3001\u5716\u7247\u3001\u8996\u983b\u7b49\u591a\u7a2e\u683c\u5f0f\u3002\u6578\u64da\u722c\u87f2\u901a\u5e38\u4f7f\u7528\u7de8\u7a0b\u8a9e\u8a00\uff08\u5982Python\u3001Java\u7b49\uff09\u4f86\u7de8\u5beb\uff0c\u4e26\u5229\u7528\u5404\u7a2e\u5eab\uff08\u5982Beautiful Soup\u3001Scrapy\u7b49\uff09\u4f86\u89e3\u6790HTML\u6216XML\u6587\u6a94\u3002<\/p>\n<h3 id=\"%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%e7%9a%84%e5%9f%ba%e6%9c%ac%e6%b5%81%e7%a8%8b-graNKYvAQY\">\u6578\u64da\u722c\u87f2\u7684\u57fa\u672c\u6d41\u7a0b<\/h3>\n<ul>\n<li><strong>\u767c\u9001\u8acb\u6c42\uff1a<\/strong>\u722c\u87f2\u9996\u5148\u5411\u76ee\u6a19\u7db2\u7ad9\u767c\u9001HTTP\u8acb\u6c42\uff0c\u7372\u53d6\u7db2\u9801\u7684HTML\u5167\u5bb9\u3002<\/li>\n<li><strong>\u89e3\u6790\u6578\u64da\uff1a<\/strong>\u4f7f\u7528\u89e3\u6790\u5eab\u5c0d\u7372\u53d6\u7684HTML\u5167\u5bb9\u9032\u884c\u89e3\u6790\uff0c\u63d0\u53d6\u6240\u9700\u7684\u6578\u64da\u3002<\/li>\n<li><strong>\u6578\u64da\u5b58\u5132\uff1a<\/strong>\u5c07\u63d0\u53d6\u7684\u6578\u64da\u5b58\u5132\u5230\u672c\u5730\u6587\u4ef6\u6216\u6578\u64da\u5eab\u4e2d\uff0c\u4ee5\u4fbf\u5f8c\u7e8c\u5206\u6790\u3002<\/li>\n<\/ul>\n<h3 id=\"%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%e7%9a%84%e7%a4%ba%e4%be%8b%e4%bb%a3%e7%a2%bc-graNKYvAQY\">\u6578\u64da\u722c\u87f2\u7684\u793a\u4f8b\u4ee3\u78bc<\/h3>\n<pre><code>import requests\nfrom bs4 import BeautifulSoup\n\n# \u767c\u9001\u8acb\u6c42\nurl = 'https:\/\/example.com'\nresponse = requests.get(url)\n\n# \u89e3\u6790\u6578\u64da\nsoup = BeautifulSoup(response.text, 'html.parser')\ndata = soup.find_all('h2')  # \u63d0\u53d6\u6240\u6709<h2>\u6a19\u7c64\u7684\u5167\u5bb9\n\n# \u5b58\u5132\u6578\u64da\nfor item in data:\n    print(item.text)\n<\/code><\/pre>\n<h2 id=\"%e4%bb%80%e9%ba%bc%e6%98%af%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5%ef%bc%9f-graNKYvAQY\">\u4ec0\u9ebc\u662f\u6578\u64da\u5eab\u9023\u63a5\uff1f<\/h2>\n<p>\u6578\u64da\u5eab\u9023\u63a5\u662f\u6307\u901a\u904e\u7279\u5b9a\u7684\u5354\u8b70\u548c\u63a5\u53e3\uff0c\u5c07\u61c9\u7528\u7a0b\u5e8f\u8207\u6578\u64da\u5eab\u9032\u884c\u9023\u63a5\uff0c\u4ee5\u4fbf\u9032\u884c\u6578\u64da\u7684\u8b80\u53d6\u548c\u5beb\u5165\u64cd\u4f5c\u3002\u5e38\u898b\u7684\u6578\u64da\u5eab\u5305\u62ecMySQL\u3001PostgreSQL\u3001MongoDB\u7b49\u3002\u6578\u64da\u5eab\u9023\u63a5\u901a\u5e38\u4f7f\u7528SQL\u8a9e\u8a00\u9032\u884c\u67e5\u8a62\u548c\u64cd\u4f5c\u3002<\/p>\n<h3 id=\"%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5%e7%9a%84%e5%9f%ba%e6%9c%ac%e6%b5%81%e7%a8%8b-graNKYvAQY\">\u6578\u64da\u5eab\u9023\u63a5\u7684\u57fa\u672c\u6d41\u7a0b<\/h3>\n<ul>\n<li><strong>\u5efa\u7acb\u9023\u63a5\uff1a<\/strong>\u4f7f\u7528\u6578\u64da\u5eab\u9a45\u52d5\u7a0b\u5e8f\u5efa\u7acb\u8207\u6578\u64da\u5eab\u7684\u9023\u63a5\u3002<\/li>\n<li><strong>\u57f7\u884c\u67e5\u8a62\uff1a<\/strong>\u901a\u904eSQL\u8a9e\u53e5\u57f7\u884c\u6578\u64da\u67e5\u8a62\u6216\u66f4\u65b0\u64cd\u4f5c\u3002<\/li>\n<li><strong>\u8655\u7406\u7d50\u679c\uff1a<\/strong>\u8655\u7406\u67e5\u8a62\u7d50\u679c\uff0c\u4e26\u5c07\u5176\u8fd4\u56de\u7d66\u61c9\u7528\u7a0b\u5e8f\u3002<\/li>\n<\/ul>\n<h3 id=\"%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5%e7%9a%84%e7%a4%ba%e4%be%8b%e4%bb%a3%e7%a2%bc-graNKYvAQY\">\u6578\u64da\u5eab\u9023\u63a5\u7684\u793a\u4f8b\u4ee3\u78bc<\/h3>\n<pre><code>import mysql.connector\n\n# \u5efa\u7acb\u9023\u63a5\nconn = mysql.connector.connect(\n    host='localhost',\n    user='yourusername',\n    password='yourpassword',\n    database='yourdatabase'\n)\n\n# \u57f7\u884c\u67e5\u8a62\ncursor = conn.cursor()\ncursor.execute(\"SELECT * FROM yourtable\")\n\n# \u8655\u7406\u7d50\u679c\nfor row in cursor.fetchall():\n    print(row)\n\n# \u95dc\u9589\u9023\u63a5\ncursor.close()\nconn.close()\n<\/code><\/pre>\n<h2 id=\"%e6%95%b8%e6%93%9a%e7%88%ac%e8%9f%b2%e8%88%87%e6%95%b8%e6%93%9a%e5%ba%ab%e9%80%a3%e6%8e%a5%e7%9a%84%e6%87%89%e7%94%a8%e5%a0%b4%e6%99%af-graNKYvAQY\">\u6578\u64da\u722c\u87f2\u8207\u6578\u64da\u5eab\u9023\u63a5\u7684\u61c9\u7528\u5834\u666f<\/h2>\n<p>\u6578\u64da\u722c\u87f2\u548c\u6578\u64da\u5eab\u9023\u63a5\u53ef\u4ee5\u7d50\u5408\u4f7f\u7528\uff0c\u4ee5\u5be6\u73fe\u66f4\u9ad8\u6548\u7684\u6578\u64da\u63a1\u96c6\u548c\u7ba1\u7406\u3002\u4f8b\u5982\uff0c\u4f01\u696d\u53ef\u4ee5\u4f7f\u7528\u6578\u64da\u722c\u87f2\u5f9e\u7af6\u722d\u5c0d\u624b\u7684\u7db2\u7ad9\u4e0a\u63d0\u53d6\u7522\u54c1\u4fe1\u606f\uff0c\u7136\u5f8c\u5c07\u9019\u4e9b\u6578\u64da\u5b58\u5132\u5230\u81ea\u5df1\u7684\u6578\u64da\u5eab\u4e2d\u9032\u884c\u5206\u6790\u3002\u9019\u6a23\u4e0d\u50c5\u80fd\u5920\u7372\u53d6\u6700\u65b0\u7684\u5e02\u5834\u52d5\u614b\uff0c\u9084\u80fd\u5e6b\u52a9\u4f01\u696d\u5236\u5b9a\u66f4\u6709\u6548\u7684\u71df\u92b7\u7b56\u7565\u3002<\/p>\n<h2 id=\"%e7%b8%bd%e7%b5%90-graNKYvAQY\">\u7e3d\u7d50<\/h2>\n<p>\u6578\u64da\u722c\u87f2\u548c\u6578\u64da\u5eab\u9023\u63a5\u662f\u73fe\u4ee3\u6578\u64da\u63a1\u96c6\u7684\u91cd\u8981\u5de5\u5177\uff0c\u80fd\u5920\u5e6b\u52a9\u7528\u6236\u9ad8\u6548\u5730\u7372\u53d6\u548c\u7ba1\u7406\u6578\u64da\u3002\u96a8\u8457\u6280\u8853\u7684\u767c\u5c55\uff0c\u9019\u4e9b\u5de5\u5177\u7684\u61c9\u7528\u7bc4\u570d\u5c07\u6703\u66f4\u52a0\u5ee3\u6cdb\u3002\u5982\u679c\u60a8\u9700\u8981\u7a69\u5b9a\u7684\u6578\u64da\u5b58\u5132\u548c\u8655\u7406\u74b0\u5883\uff0c\u8003\u616e\u4f7f\u7528<a href=\"https:\/\/server.hk\">\u9999\u6e2fVPS<\/a>\u6216<a href=\"https:\/\/server.hk\">\u96f2\u4f3a\u670d\u5668<\/a>\u4f86\u652f\u6301\u60a8\u7684\u6578\u64da\u63a1\u96c6\u9700\u6c42\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u9ad8\u6548\u4fbf\u6377\u7684\u6578\u64da\u63a1\u96c6\uff0c\u63a2\u7d22\u6578\u64da\u722c\u87f2\u6280\u8853\u8207\u6578\u64da\u5eab\u9023\u63a5\uff0c\u5be6\u73fe\u5feb\u901f\u6578\u64da\u722c\u53d6\u8207\u7ba1\u7406\uff0c\u63d0\u5347\u696d\u52d9\u6c7a\u7b56\u6548\u7387\u3002<\/p>\n","protected":false},"author":0,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[101],"tags":[],"class_list":["post-157818","post","type-post","status-publish","format-standard","hentry","category-database"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/157818","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=157818"}],"version-history":[{"count":1,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/157818\/revisions"}],"predecessor-version":[{"id":157819,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/157818\/revisions\/157819"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=157818"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=157818"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=157818"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}