{"id":205258,"date":"2025-05-29T17:08:15","date_gmt":"2025-05-29T09:08:15","guid":{"rendered":"https:\/\/server.hk\/cnblog\/205258\/"},"modified":"2025-05-29T17:08:15","modified_gmt":"2025-05-29T09:08:15","slug":"%e4%b8%ba%e4%bb%80%e4%b9%88%e7%94%a8-python-%e8%8e%b7%e5%8f%96%e7%9a%84%e7%bd%91%e5%9d%80%e5%b8%a6%e6%9c%89%e6%8b%ac%e5%8f%b7%e5%92%8c%e5%8d%95%e5%bc%95%e5%8f%b7%ef%bc%9f","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/205258\/","title":{"rendered":"\u4e3a\u4ec0\u4e48\u7528 Python \u83b7\u53d6\u7684\u7f51\u5740\u5e26\u6709\u62ec\u53f7\u548c\u5355\u5f15\u53f7\uff1f"},"content":{"rendered":"<p><b><\/b>     <\/p>\n<h1>\u4e3a\u4ec0\u4e48\u7528 Python \u83b7\u53d6\u7684\u7f51\u5740\u5e26\u6709\u62ec\u53f7\u548c\u5355\u5f15\u53f7\uff1f<\/h1>\n<p>\u5b66\u4e60\u8981\u52aa\u529b\uff0c\u4f46\u662f\u4e0d\u8981\u6025\uff01\u4eca\u5929\u7684\u8fd9\u7bc7\u6587\u7ae0<span style=\"color: #FF6600;, Helvetica, Arial, sans-serif;font-size: 14px;background-color: #FFFFFF\">\u300a\u4e3a\u4ec0\u4e48\u7528 Python \u83b7\u53d6\u7684\u7f51\u5740\u5e26\u6709\u62ec\u53f7\u548c\u5355\u5f15\u53f7\uff1f\u300b<\/span>\u5c06\u4f1a\u4ecb\u7ecd\u5230<span style=\"color: #FF6600;, Helvetica, Arial, sans-serif;font-size: 14px;background-color: #FFFFFF\"><\/span>\u7b49\u7b49\u77e5\u8bc6\u70b9\uff0c\u5982\u679c\u4f60\u60f3\u6df1\u5165\u5b66\u4e60<span style=\"color: #FF6600;, Helvetica, Arial, sans-serif;font-size: 14px;background-color: #FFFFFF\">\u6587\u7ae0<\/span>\uff0c\u53ef\u4ee5\u5173\u6ce8\u6211\uff01\u6211\u4f1a\u6301\u7eed\u66f4\u65b0\u76f8\u5173\u6587\u7ae0\u7684\uff0c\u5e0c\u671b\u5bf9\u5927\u5bb6\u90fd\u80fd\u6709\u6240\u5e2e\u52a9\uff01<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.17golang.com\/uploads\/20241127\/17326949786746d3c2c5032.jpg\" class=\"aligncenter\"><\/p>\n<p><strong>\u6211\u7528 python \u83b7\u53d6\u7684\u7f51\u5740\u4e3a\u4f55\u5e26\u6709\u62ec\u53f7\u548c\u5355\u5f15\u53f7\uff1f<\/strong><\/p>\n<p>\u5728\u7f16\u5199\u722c\u866b\u7a0b\u5e8f\u65f6\uff0c\u4f60\u53ef\u80fd\u4f1a\u9047\u5230\u83b7\u53d6\u7684\u7f51\u5740\u4e2d\u5305\u542b\u62ec\u53f7\u548c\u5355\u5f15\u53f7\u7684\u60c5\u51b5\uff0c\u8fd9\u53ef\u80fd\u4f1a\u7ed9\u540e\u7eed\u5904\u7406\u5e26\u6765\u4e0d\u4fbf\u3002\u4e0b\u9762\u6211\u4eec\u5c06\u63a2\u8ba8\u5bfc\u81f4\u8fd9\u79cd\u60c5\u51b5\u7684\u539f\u56e0\u5e76\u63d0\u4f9b\u89e3\u51b3\u65b9\u6848\uff1a<\/p>\n<p><strong>\u539f\u56e0\uff1a<\/strong><\/p>\n<p>\u4f60\u4f7f\u7528\u7684\u89e3\u6790\u5de5\u5177\uff08\u5982 beautifulsoup\uff09\u5728\u89e3\u6790 html \u65f6\uff0c\u4f1a\u5c06\u539f\u59cb\u7684 html \u5b57\u7b26\u4e32\u8f6c\u6362\u4e3a python \u5bf9\u8c61\u3002html \u4e2d\u7684\u67d0\u4e9b\u7279\u6b8a\u5b57\u7b26\uff0c\u5982 &lt;\u3001&gt;\u3001&amp; \u548c &#8216;, \u5728 python \u4e2d\u5177\u6709\u7279\u6b8a\u7684\u542b\u4e49\uff0c\u56e0\u6b64\u5728\u89e3\u6790\u65f6\u9700\u8981\u8fdb\u884c\u8f6c\u4e49\u3002<\/p>\n<p>\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0cbeautifulsoup \u4f7f\u7528 html5 \u7684\u89e3\u6790\u5668\uff0c\u5b83\u4f1a\u5c06\u8fd9\u4e9b\u7279\u6b8a\u5b57\u7b26\u8f6c\u4e49\u6210\u5bf9\u5e94\u7684 html \u5b9e\u4f53\uff0c\u5982 &lt; \u8868\u793a &lt;\u3001&gt; \u8868\u793a &gt;\u3001&amp; \u8868\u793a &amp; \u548c &#8216; \u8868\u793a &#8216;. \u8fd9\u4e9b\u8f6c\u4e49\u5bf9\u4e8e\u907f\u514d\u5728 python \u4e2d\u4ea7\u751f\u8bed\u6cd5\u9519\u8bef\u5f88\u91cd\u8981\u3002<\/p>\n<p><strong>\u89e3\u51b3\u65b9\u6848\uff1a<\/strong><\/p>\n<p>\u8981\u83b7\u53d6\u4e0d\u5e26\u62ec\u53f7\u548c\u5355\u5f15\u53f7\u7684\u7f51\u5740\uff0c\u4f60\u53ef\u4ee5\u6267\u884c\u4ee5\u4e0b\u64cd\u4f5c\uff1a<\/p>\n<ul>\n<li><strong>\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u53bb\u9664 html \u5b9e\u4f53\uff1a<\/strong>\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f re.sub(&#8216;&amp;.*?;&#8217;, &#8221;, link) \u5c06 html \u5b9e\u4f53\u66ff\u6362\u4e3a\u7a7a\u5b57\u7b26\u4e32\u3002<\/li>\n<li><strong>\u4f7f\u7528 lxml \u89e3\u6790\u5668\uff1a<\/strong>lxml \u662f\u53e6\u4e00\u4e2a\u6d41\u884c\u7684 html \u89e3\u6790\u5e93\uff0c\u5176\u9ed8\u8ba4\u89e3\u6790\u5668\u4e0d\u4f1a\u5c06\u7279\u6b8a\u5b57\u7b26\u8f6c\u4e49\u6210 html \u5b9e\u4f53\u3002\u53ea\u9700\u5c06 beautifulsoup(html, &#8220;html5lib&#8221;) \u66ff\u6362\u4e3a etree.html(html) \u5373\u53ef\u3002<\/li>\n<li><strong>\u624b\u52a8\u89e3\u7801\u8f6c\u4e49\u5b57\u7b26\uff1a<\/strong>\u4f60\u53ef\u4ee5\u4f7f\u7528 html.unescape() \u51fd\u6570\u624b\u52a8\u89e3\u7801\u8f6c\u4e49\u5b57\u7b26\uff0c\u5373\u5c06 &#8216;&lt;a href=&#8221;url&#8221;&gt;&#8217; \u89e3\u7801\u4e3a &#8216;&lt;a href=&#8221;url&#8221;&gt;&#8217;\u3002<\/li>\n<\/ul>\n<p>\u4fee\u6539\u540e\u7684\u4ee3\u7801\u793a\u4f8b\u5982\u4e0b\uff1a<\/p>\n<pre>import requests\nfrom lxml import etree\n\nurl = 'http:\/\/www.prnasia.com\/m\/mediafeed\/rss?id=2303&amp;amp;t=240'\nheaders = {\n    'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/87.0.4280.66 Safari\/537.36'\n}\nres = requests.get(url, headers=headers)\nres_dome = etree.HTML(res.text)\nfor link in res_dome.xpath('\/\/h3\/a\/@href'):\n    print(link)  # \u65e0\u62ec\u53f7\u548c\u5355\u5f15\u53f7\u7684\u7f51\u5740<\/pre>\n<p>\u7ec8\u4e8e\u4ecb\u7ecd\u5b8c\u5566\uff01\u5c0f\u4f19\u4f34\u4eec\uff0c\u8fd9\u7bc7\u5173\u4e8e\u300a\u4e3a\u4ec0\u4e48\u7528 Python \u83b7\u53d6\u7684\u7f51\u5740\u5e26\u6709\u62ec\u53f7\u548c\u5355\u5f15\u53f7\uff1f\u300b\u7684\u4ecb\u7ecd\u5e94\u8be5\u8ba9\u4f60\u6536\u83b7\u591a\u591a\u4e86\u5427\uff01\u6b22\u8fce\u5927\u5bb6\u6536\u85cf\u6216\u5206\u4eab\u7ed9\u66f4\u591a\u9700\u8981\u5b66\u4e60\u7684\u670b\u53cb\u5427~\u516c\u4f17\u53f7\u4e5f\u4f1a\u53d1\u5e03\u6587\u7ae0\u76f8\u5173\u77e5\u8bc6\uff0c\u5feb\u6765\u5173\u6ce8\u5427\uff01<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4e3a\u4ec0\u4e48\u7528 Python \u83b7\u53d6\u7684\u7f51&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4925],"tags":[],"class_list":["post-205258","post","type-post","status-publish","format-standard","hentry","category-4925"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/205258","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=205258"}],"version-history":[{"count":0,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/205258\/revisions"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=205258"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=205258"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=205258"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}