{"id":204536,"date":"2025-05-29T14:17:24","date_gmt":"2025-05-29T06:17:24","guid":{"rendered":"https:\/\/server.hk\/cnblog\/204536\/"},"modified":"2025-05-29T14:17:24","modified_gmt":"2025-05-29T06:17:24","slug":"%e5%a6%82%e4%bd%95%e5%9c%a8-scrapy-%e4%b8%ad%e4%bd%bf%e7%94%a8-meta-%e5%ad%97%e5%85%b8%e4%bc%a0%e9%80%92%e5%8f%82%e6%95%b0%e5%90%88%e5%b9%b6%e5%88%97%e8%a1%a8%e9%a1%b5%e5%92%8c%e8%af%a6%e6%83%85","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/204536\/","title":{"rendered":"\u5982\u4f55\u5728 Scrapy \u4e2d\u4f7f\u7528 Meta \u5b57\u5178\u4f20\u9012\u53c2\u6570\u5408\u5e76\u5217\u8868\u9875\u548c\u8be6\u60c5\u9875\u4fe1\u606f\uff1f"},"content":{"rendered":"<p><b><\/b>     <\/p>\n<h1>\u5982\u4f55\u5728 Scrapy \u4e2d\u4f7f\u7528 Meta \u5b57\u5178\u4f20\u9012\u53c2\u6570\u5408\u5e76\u5217\u8868\u9875\u548c\u8be6\u60c5\u9875\u4fe1\u606f\uff1f<\/h1>\n<p>\u4ece\u73b0\u5728\u5f00\u59cb\uff0c\u52aa\u529b\u5b66\u4e60\u5427\uff01\u672c\u6587\u4e3b\u8981\u8bb2\u89e3\u4e86<span style=\"color: #FF6600;, Helvetica, Arial, sans-serif;font-size: 14px;background-color: #FFFFFF\"><\/span>\u7b49\u7b49\u76f8\u5173\u77e5\u8bc6\u70b9\uff0c\u6211\u4f1a\u5728\u4e2d\u6301\u7eed\u66f4\u65b0\u76f8\u5173\u7684\u7cfb\u5217\u6587\u7ae0\uff0c\u6b22\u8fce\u5927\u5bb6\u5173\u6ce8\u5e76\u79ef\u6781\u7559\u8a00\u5efa\u8bae\u3002\u4e0b\u9762\u5c31\u5148\u4e00\u8d77\u6765\u770b\u4e00\u4e0b\u672c\u7bc7\u6b63\u6587\u5185\u5bb9\u5427\uff0c\u5e0c\u671b\u80fd\u5e2e\u5230\u4f60\uff01<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.17golang.com\/uploads\/20241110\/173120710367301fbf0eaba.jpg\" class=\"aligncenter\"><\/p>\n<p><strong>\u4f7f\u7528 scrapy meta \u4f20\u9012\u53c2\u6570<\/strong><\/p>\n<p>\u5728 scrapy \u4e2d\uff0citemparser \u53ef\u4ee5\u4f7f\u7528 meta \u5b57\u5178\u6765\u4f20\u9012\u53c2\u6570\uff0c\u8fd9\u5141\u8bb8\u6211\u4eec\u5c06\u5217\u8868\u9875\u6293\u53d6\u7684\u4fe1\u606f\u4e0e\u8be6\u60c5\u9875\u6293\u53d6\u7684\u4fe1\u606f\u5408\u5e76\u5230\u540c\u4e00\u4e2a item \u4e2d\u3002<\/p>\n<p><strong>\u5177\u4f53\u6b65\u9aa4\uff1a<\/strong><\/p>\n<ol>\n<li><strong>\u5728\u5217\u8868\u9875 itemparser \u4e2d\u6293\u53d6\u6807\u9898\u3001\u65f6\u95f4\u548c url\uff1a<\/strong><\/li>\n<\/ol>\n<pre>def parse(self, response):\n    # \u83b7\u53d6\u5217\u8868\u9875\u7684\u6807\u9898\u3001\u65f6\u95f4\u3001url\n    title = response.css(\"h1::text\").get()\n    time = response.css(\".time::text\").get()\n    url = response.css(\"a::attr(href)\").get()\n\n    # \u5c06\u5217\u8868\u9875\u4fe1\u606f\u653e\u5165 meta \u5b57\u5178\u4e2d\n    meta = {\n        \"title\": title,\n        \"time\": time,\n        \"url\": url\n    }\n\n    # \u901a\u8fc7 request \u56de\u8c03\u7ed9\u8be6\u60c5\u9875 itemparser\n    yield request(url, callback=self.parse_item, meta=meta)<\/pre>\n<ol>\n<li><strong>\u5728\u8be6\u60c5\u9875 itemparser \u4e2d\u6293\u53d6\u5185\u5bb9\u5e76\u5408\u5e76\u5217\u8868\u9875\u4fe1\u606f\uff1a<\/strong><\/li>\n<\/ol>\n<pre>def parse_item(self, response):\n    # \u83b7\u53d6\u8be6\u60c5\u9875\u7684\u5185\u5bb9\n    content = response.css(\".content::text\").get()\n\n    # \u4ece meta \u5b57\u5178\u4e2d\u83b7\u53d6\u5217\u8868\u9875\u4fe1\u606f\n    meta = response.meta\n    title = meta[\"title\"]\n    time = meta[\"time\"]\n    url = meta[\"url\"]\n\n    # \u521b\u5efa Item \u5e76\u8d4b\u503c\n    item = Item()\n    item[\"title\"] = title\n    item[\"time\"] = time\n    item[\"url\"] = url\n    item[\"content\"] = content\n\n    yield item<\/pre>\n<p>\u901a\u8fc7\u8fd9\u79cd\u65b9\u5f0f\uff0c\u6211\u4eec\u53ef\u4ee5\u5728\u8be6\u60c5\u9875 itemparser \u4e2d\u8bbf\u95ee\u5217\u8868\u9875\u6293\u53d6\u7684\u4fe1\u606f\uff0c\u5e76\u5c06\u5b83\u4eec\u4e0e\u8be6\u60c5\u9875\u6293\u53d6\u7684\u5185\u5bb9\u5408\u5e76\u5230\u540c\u4e00\u4e2a item \u4e2d\u3002<\/p>\n<p>\u4ee5\u4e0a\u5c31\u662f\u300a\u5982\u4f55\u5728 Scrapy \u4e2d\u4f7f\u7528 Meta \u5b57\u5178\u4f20\u9012\u53c2\u6570\u5408\u5e76\u5217\u8868\u9875\u548c\u8be6\u60c5\u9875\u4fe1\u606f\uff1f\u300b\u7684\u8be6\u7ec6\u5185\u5bb9\uff0c\u66f4\u591a\u5173\u4e8e\u7684\u8d44\u6599\u8bf7\u5173\u6ce8\u516c\u4f17\u53f7\uff01<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5982\u4f55\u5728 Scrapy \u4e2d\u4f7f\u7528 M&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4925],"tags":[],"class_list":["post-204536","post","type-post","status-publish","format-standard","hentry","category-4925"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/204536","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=204536"}],"version-history":[{"count":0,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/204536\/revisions"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=204536"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=204536"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=204536"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}