{"id":2239,"date":"2025-05-20T10:29:12","date_gmt":"2025-05-20T01:29:12","guid":{"rendered":"https:\/\/www.yilus5.com\/blog\/?p=2239"},"modified":"2025-05-22T21:12:17","modified_gmt":"2025-05-22T12:12:17","slug":"%e5%a6%82%e4%bd%95%e5%9c%a8%e8%8b%b1%e5%9b%bd%e4%b8%ba-scrapy-%e8%ae%be%e7%bd%ae%e8%bd%ae%e6%8d%a2-isp-%e4%bb%a3%e7%90%86","status":"publish","type":"post","link":"https:\/\/www.yilus5.com\/blog\/2239.html","title":{"rendered":"\u5982\u4f55\u5728\u82f1\u56fd\u4e3a Scrapy \u8bbe\u7f6e\u8f6e\u6362 ISP \u4ee3\u7406"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">\u8bdd\u8bf4\uff0c\u54b1\u4eec\u5728\u82f1\u56fd\u8fd9\u65ee\u65ef\uff0c\u8981\u662f\u60f3\u7528 Scrapy \u8fd9\u628a\u722c\u866b\u5229\u5668\u53bb\u7f51\u4e0a\u6e9c\u8fbe\u6e9c\u8fbe\uff0c\u6293\u70b9\u6570\u636e\u5565\u7684\uff0c\u6709\u65f6\u5019\u4f1a\u9047\u5230\u70b9\u5c0f\u9ebb\u70e6\u3002\u4f60\u60f3\u554a\uff0c\u4e00\u4e2a IP \u5730\u5740\u8981\u662f\u77ed\u65f6\u95f4\u5185\u8bbf\u95ee\u592a\u591a\u7f51\u7ad9\uff0c\u6216\u8005\u9891\u7387\u592a\u9ad8\uff0c\u5f88\u5bb9\u6613\u5c31\u88ab\u4eba\u5bb6\u7f51\u7ad9\u7ed9\u76ef\u4e0a\uff0c\u8f7b\u5219\u7ed9\u4f60\u9650\u4e2a\u901f\uff0c\u91cd\u5219\u76f4\u63a5\u628a\u4f60 IP \u7ed9\u5c01\u4e86\uff0c\u8ba9\u4f60\u8fde\u95e8\u90fd\u8fdb\u4e0d\u53bb\u3002\u8fd9\u53ef\u548b\u529e\u5462\uff1f<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u522b\u7740\u6025\uff0c\u529e\u6cd5\u603b\u662f\u6709\u7684\uff01\u5c31\u50cf\u54b1\u4eec\u5e73\u65f6\u51fa\u95e8\u5f97\u591a\u5907\u51e0\u5957\u8863\u670d\u4e00\u6837\uff0c\u8ba9 Scrapy \u4e5f\u5b66\u4f1a\u201c\u6362\u88c5\u201d\uff0c\u7528\u4e0d\u540c\u7684 IP \u5730\u5740\u53bb\u8bbf\u95ee\uff0c\u5c31\u80fd\u5927\u5927\u964d\u4f4e\u88ab\u5c01\u7684\u98ce\u9669\u3002\u8fd9\u79cd\u201c\u6362\u88c5\u201d\u7684\u6280\u5de7\uff0c\u54b1\u4eec\u5c31\u53eb\u505a\u4f7f\u7528\u8f6e\u6362 ISP \u4ee3\u7406\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u90a3\u4e48\u95ee\u9898\u6765\u4e86\uff0c\u8fd9\u8f6e\u6362 ISP \u4ee3\u7406\u5230\u5e95\u662f\u4e2a\u5565\u73a9\u610f\u513f\uff1f\u7b80\u5355\u6765\u8bf4\uff0c\u5c31\u662f\u8ba9\u4f60\u7684 Scrapy \u722c\u866b\u5728\u8bbf\u95ee\u7f51\u7ad9\u7684\u65f6\u5019\uff0c\u4e0d\u518d\u603b\u662f\u7528\u540c\u4e00\u4e2a IP \u5730\u5740\uff0c\u800c\u662f\u50cf\u201c\u4e03\u5341\u4e8c\u53d8\u201d\u4e00\u6837\uff0c\u9694\u4e00\u6bb5\u65f6\u95f4\u6216\u8005\u8bbf\u95ee\u4e00\u5b9a\u6b21\u6570\u540e\uff0c\u5c31\u81ea\u52a8\u5207\u6362\u5230\u53e6\u4e00\u4e2a\u53ef\u7528\u7684 IP \u5730\u5740\u3002\u8fd9\u6837\u4e00\u6765\uff0c\u76ee\u6807\u7f51\u7ad9\u5c31\u5f88\u96be\u8ffd\u8e2a\u5230\u4f60\u771f\u6b63\u7684\u8eab\u4efd\uff0c\u81ea\u7136\u4e5f\u5c31\u6ca1\u90a3\u4e48\u5bb9\u6613\u628a\u4f60\u7ed9\u201c\u62c9\u9ed1\u201d\u5566\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u82f1\u56fd\u8fd9\u7247\u571f\u5730\u4e0a\uff0c\u60f3\u8981\u987a\u7545\u5730\u8fdb\u884c\u7f51\u7edc\u6570\u636e\u91c7\u96c6\uff0c\u9009\u62e9\u4e00\u4e2a\u9760\u8c31\u7684 ISP \u4ee3\u7406\u670d\u52a1\u5546\u5c31\u663e\u5f97\u5c24\u4e3a\u91cd\u8981\u3002\u8bf4\u5230\u8fd9\u91cc\uff0c\u5c31\u4e0d\u5f97\u4e0d\u63d0\u4e00\u4e0b<strong>\u5168\u7403\u9876\u7ea7\u7684 IP \u4ee3\u7406\u5e73\u53f0\u2014\u2014<a href=\"https:\/\/www.yilus5.com\/\">\u6613\u8def\u4ee3\u7406<\/a><\/strong>\u3002\u4ed6\u4eec\u5bb6\u53ef\u4e0d\u662f\u76d6\u7684\uff0c\u542c\u8bf4<strong>\u91c7\u7528\u7684\u662f\u81ea\u8425\u7eaf\u51c0\u673a\u623f\u548c\u9ad8\u8d28\u91cf\u4f4f\u5b85\u7ebf\u8def<\/strong>\uff0c\u8fd9\u542c\u8d77\u6765\u5c31\u8ba9\u4eba\u653e\u5fc3\u4e0d\u5c11\u3002\u4f60\u60f3\u554a\uff0c\u81ea\u8425\u7684\u673a\u623f\uff0c\u90a3\u8d28\u91cf\u80af\u5b9a\u66f4\u6709\u4fdd\u969c\uff1b\u9ad8\u8d28\u91cf\u7684\u4f4f\u5b85\u7ebf\u8def\uff0c\u5c31\u66f4\u50cf\u662f\u54b1\u4eec\u666e\u901a\u5bb6\u5ead\u7528\u7684\u7f51\u7edc IP\uff0c\u7528\u8d77\u6765\u66f4\u81ea\u7136\uff0c\u66f4\u4e0d\u5bb9\u6613\u88ab\u76ee\u6807\u7f51\u7ad9\u8bc6\u522b\u51fa\u6765\u662f\u201c\u673a\u5668\u4eba\u201d\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u66f4\u7ed9\u529b\u7684\u662f\uff0c\u6613\u8def\u4ee3\u7406\u8fd8<strong>\u7a33\u5b9a\u4f9b\u5e94\u6b27\u7f8e\u3001\u4e9a\u6d32\u7b49\u5730\u7684 IP \u8d44\u6e90<\/strong>\uff0c\u8fd9\u5bf9\u4e8e\u9700\u8981\u5728\u82f1\u56fd\u8bbf\u95ee\u5168\u7403\u5404\u5730\u7f51\u7ad9\u7684\u670b\u53cb\u6765\u8bf4\uff0c\u7b80\u76f4\u662f\u96ea\u4e2d\u9001\u70ad\u3002\u800c\u4e14\uff0c\u4ed6\u4eec\u5bb6\u7684 IP <strong>\u9002\u914d\u5404\u79cd\u4e3b\u6d41\u5e94\u7528\u573a\u666f<\/strong>\uff0c\u65e0\u8bba\u662f\u7535\u5546\u6570\u636e\u6293\u53d6\u3001\u793e\u4ea4\u5a92\u4f53\u5206\u6790\uff0c\u8fd8\u662f\u5e02\u573a\u8c03\u67e5\u7b49\u7b49\uff0c\u90fd\u80fd\u8f7b\u677e\u5e94\u5bf9\u3002\u6709\u4e86\u6613\u8def\u4ee3\u7406\u7684\u52a9\u529b\uff0c\u54b1\u4eec\u5728\u82f1\u56fd\u7528 Scrapy \u641e\u722c\u866b\uff0c\u5c31\u80fd\u66f4\u52a0\u5b89\u5fc3\uff0c<strong>\u8f7b\u677e\u5e94\u5bf9\u5404\u79cd\u7f51\u7edc\u6311\u6218<\/strong>\u5566\uff01<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u597d\u4e86\uff0c\u5938\u4e86\u8fd9\u4e48\u591a\uff0c\u54b1\u4eec\u8fd8\u662f\u5f97\u56de\u5230\u6b63\u9898\uff0c\u804a\u804a\u5982\u4f55\u5728\u82f1\u56fd\u4e3a Scrapy \u8bbe\u7f6e\u8f6e\u6362 ISP \u4ee3\u7406\u3002\u5176\u5b9e\uff0c\u5b9e\u73b0\u8fd9\u4e2a\u529f\u80fd\u5e76\u4e0d\u590d\u6742\uff0c\u4e3b\u8981\u6d89\u53ca\u5230\u4ee5\u4e0b\u51e0\u4e2a\u6b65\u9aa4\uff1a<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u7b2c\u4e00\u6b65\uff1a\u9009\u62e9\u5e76\u83b7\u53d6\u4ee3\u7406 IP \u5217\u8868<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u53ef\u662f\u6700\u5173\u952e\u7684\u4e00\u6b65\uff01\u4f60\u5f97\u5148\u627e\u5230\u9760\u8c31\u7684\u4ee3\u7406 IP \u6765\u6e90\u3002\u5982\u679c\u4f60\u9009\u62e9\u4e86\u50cf\u6613\u8def\u4ee3\u7406\u8fd9\u6837\u7684\u5e73\u53f0\uff0c\u4ed6\u4eec\u901a\u5e38\u4f1a\u63d0\u4f9b API \u63a5\u53e3\u6216\u8005 IP \u5217\u8868\u4f9b\u4f60\u83b7\u53d6\u53ef\u7528\u7684\u4ee3\u7406 IP\u3002\u4f60\u9700\u8981\u6839\u636e\u81ea\u5df1\u7684\u9700\u6c42\uff0c\u6bd4\u5982\u5730\u7406\u4f4d\u7f6e\uff08\u82f1\u56fd\u672c\u5730 IP \u8fd8\u662f\u5176\u4ed6\u5730\u533a\u7684 IP\uff09\u3001IP \u7c7b\u578b\uff08\u673a\u623f IP \u8fd8\u662f\u4f4f\u5b85 IP\uff09\u7b49\u8fdb\u884c\u9009\u62e9\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e00\u822c\u6765\u8bf4\uff0c\u4f4f\u5b85 IP \u7684\u9690\u533f\u6027\u66f4\u597d\uff0c\u66f4\u4e0d\u5bb9\u6613\u88ab\u8bc6\u522b\u4e3a\u722c\u866b\uff0c\u4f46\u4ef7\u683c\u53ef\u80fd\u4f1a\u76f8\u5bf9\u9ad8\u4e00\u4e9b\u3002\u673a\u623f IP \u7684\u901f\u5ea6\u53ef\u80fd\u66f4\u5feb\uff0c\u4f46\u98ce\u9669\u4e5f\u76f8\u5bf9\u9ad8\u4e00\u4e9b\u3002\u5982\u679c\u4f60\u8ffd\u6c42\u7a33\u5b9a\u6027\u548c\u66f4\u4f4e\u7684\u88ab\u5c01\u98ce\u9669\uff0c\u6613\u8def\u4ee3\u7406\u63d0\u4f9b\u7684<strong>\u9ad8\u8d28\u91cf\u4f4f\u5b85\u7ebf\u8def<\/strong>\u65e0\u7591\u662f\u4e2a\u4e0d\u9519\u7684\u9009\u62e9\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u62ff\u5230 IP \u5217\u8868\u540e\uff0c\u4f60\u9700\u8981\u628a\u5b83\u6574\u7406\u6210 Scrapy \u53ef\u4ee5\u4f7f\u7528\u7684\u683c\u5f0f\uff0c\u901a\u5e38\u662f\u4e00\u4e2a\u5305\u542b IP \u5730\u5740\u548c\u7aef\u53e3\u53f7\u7684\u5217\u8868\uff0c\u4f8b\u5982\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>http:&#47;&#47;ip1:port1\nhttp:\/\/ip2:port2\nhttps:\/\/ip3:port3\nhttps:\/\/ip4:port4\n...\n<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6ce8\u610f\uff0c\u6709\u4e9b\u4ee3\u7406\u53ef\u80fd\u9700\u8981\u7528\u6237\u540d\u548c\u5bc6\u7801\u8fdb\u884c\u8ba4\u8bc1\uff0c\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u4f60\u7684 IP \u5217\u8868\u683c\u5f0f\u53ef\u80fd\u4f1a\u662f\u8fd9\u6837\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>http:&#47;&#47;username1:password@ip1:port1\nhttp:\/\/username2:password@ip2:port2\nhttps:\/\/username3:password@ip3:port3\nhttps:\/\/username4:password@ip4:port4\n...\n<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u7b2c\u4e8c\u6b65\uff1a\u5728 Scrapy \u9879\u76ee\u4e2d\u914d\u7f6e\u4ee3\u7406\u4e2d\u95f4\u4ef6<\/strong><\/p>\n\n\n<div class=\"wp-block-image\">\n<figure class=\"aligncenter\"><img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.yilus5.com\/blog\/wp-content\/uploads\/image-2025-04-25T092830.278.jpg\" alt=\"\u6613\u8defS5\u4ee3\u7406\"\/><\/figure>\n<\/div>\n\n\n<p class=\"wp-block-paragraph\">Scrapy \u7684\u4e2d\u95f4\u4ef6\uff08Middleware\uff09\u673a\u5236\u975e\u5e38\u5f3a\u5927\uff0c\u6211\u4eec\u5c31\u662f\u901a\u8fc7\u81ea\u5b9a\u4e49\u6216\u4f7f\u7528\u73b0\u6709\u7684\u4e0b\u8f7d\u4e2d\u95f4\u4ef6\u6765\u5b9e\u73b0\u4ee3\u7406\u7684\u8f6e\u6362\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6253\u5f00\u4f60\u7684 Scrapy \u9879\u76ee\uff0c\u627e\u5230 <code>settings.py<\/code> \u6587\u4ef6\u3002<\/strong><\/li>\n\n\n\n<li><strong>\u53d6\u6d88\u6ce8\u91ca <code>DOWNLOADER_MIDDLEWARES<\/code> \u8bbe\u7f6e\uff0c\u5e76\u6dfb\u52a0\u4f60\u7684\u81ea\u5b9a\u4e49\u4ee3\u7406\u4e2d\u95f4\u4ef6\u3002<\/strong> \u5982\u679c\u4f60\u8fd8\u6ca1\u6709\u521b\u5efa\u81ea\u5b9a\u4e49\u4e2d\u95f4\u4ef6\uff0c\u9700\u8981\u5148\u521b\u5efa\u4e00\u4e2a\u3002\u5728\u4f60\u7684 Scrapy \u9879\u76ee\u7684 spider \u540c\u7ea7\u76ee\u5f55\u4e0b\uff08\u6216\u8005\u4f60\u81ea\u5b9a\u4e49\u7684 middleware \u76ee\u5f55\u4e0b\uff09\u521b\u5efa\u4e00\u4e2a Python \u6587\u4ef6\uff0c\u6bd4\u5982 <code>proxy_middleware.py<\/code>\u3002 Python<code># settings.py DOWNLOADER_MIDDLEWARES = { 'your_project_name.proxy_middleware.RotatingProxyMiddleware': 750, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, # \u5982\u679c\u4f60\u4f7f\u7528\u4e86\u81ea\u5b9a\u4e49 User-Agent \u4e2d\u95f4\u4ef6\uff0c\u9700\u8981\u7981\u7528\u9ed8\u8ba4\u7684 }<\/code> \u8fd9\u91cc\u7684 <code>'your_project_name'<\/code> \u9700\u8981\u66ff\u6362\u6210\u4f60\u7684\u5b9e\u9645\u9879\u76ee\u540d\u79f0\u3002<code>RotatingProxyMiddleware<\/code> \u662f\u6211\u4eec\u81ea\u5b9a\u4e49\u7684\u4ee3\u7406\u4e2d\u95f4\u4ef6\u7684\u7c7b\u540d\uff0c<code>750<\/code> \u662f\u4e2d\u95f4\u4ef6\u7684\u4f18\u5148\u7ea7\uff0c\u6570\u5b57\u8d8a\u5c0f\u4f18\u5148\u7ea7\u8d8a\u9ad8\u3002<\/li>\n\n\n\n<li><strong>\u5728 <code>proxy_middleware.py<\/code> \u6587\u4ef6\u4e2d\u7f16\u5199\u4f60\u7684\u4ee3\u7406\u4e2d\u95f4\u4ef6\u4ee3\u7801\u3002<\/strong> \u4e00\u4e2a\u7b80\u5355\u7684\u8f6e\u6362\u4ee3\u7406\u4e2d\u95f4\u4ef6\u53ef\u80fd\u770b\u8d77\u6765\u50cf\u8fd9\u6837\uff1a Python<code>import random import base64 class RotatingProxyMiddleware: def __init__(self, proxy_list): self.proxy_list = proxy_list self.current_proxy = None @classmethod def from_crawler(cls, crawler): proxy_list = crawler.settings.get('PROXY_LIST', []) return cls(proxy_list) def process_request(self, request, spider): if self.proxy_list: self.current_proxy = random.choice(self.proxy_list) if self.current_proxy.startswith('http:\/\/') or self.current_proxy.startswith('https:\/\/'): request.meta['proxy'] = self.current_proxy else: # \u5904\u7406\u9700\u8981\u8ba4\u8bc1\u7684\u4ee3\u7406\uff0c\u683c\u5f0f\u4e3a username:password@ip:port proxy_user_pass = self.current_proxy.split('@')[0] real_proxy = self.current_proxy.split('@')[1] encoded_user_pass = base64.b64encode(proxy_user_pass.encode()).decode() request.meta['proxy'] = real_proxy request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass spider.logger.debug(f\"Using proxy: {self.current_proxy}\") def process_response(self, request, response, spider): # \u53ef\u9009\uff1a\u5728\u8fd9\u91cc\u5904\u7406\u88ab\u4ee3\u7406\u670d\u52a1\u5668\u62d2\u7edd\u7684\u60c5\u51b5\uff0c\u4f8b\u5982\u66f4\u6362\u4ee3\u7406 if response.status in [403, 407, 503]: spider.logger.warning(f\"Proxy {request.meta.get('proxy', None)} returned status {response.status}, retrying with a new proxy.\") return self._retry(request, spider) or response return response def process_exception(self, request, exception, spider): spider.logger.error(f\"Request using proxy {request.meta.get('proxy', None)} encountered an exception: {exception}, retrying with a new proxy.\") return self._retry(request, spider) def _retry(self, request, spider): retryreq = request.copy() try: del retryreq.meta['proxy'] except KeyError: pass retryreq.dont_filter = True return retryreq<\/code> \u8fd9\u6bb5\u4ee3\u7801\u7684\u6838\u5fc3\u903b\u8f91\u662f\uff1a\n<ul class=\"wp-block-list\">\n<li>\u5728 <code>__init__<\/code> \u65b9\u6cd5\u4e2d\u63a5\u6536\u4ee3\u7406 IP \u5217\u8868\u3002<\/li>\n\n\n\n<li><code>from_crawler<\/code> \u65b9\u6cd5\u7528\u4e8e\u4ece Scrapy \u7684 settings \u4e2d\u83b7\u53d6 <code>PROXY_LIST<\/code>\u3002<\/li>\n\n\n\n<li><code>process_request<\/code> \u65b9\u6cd5\u5728\u6bcf\u4e2a\u8bf7\u6c42\u53d1\u9001\u524d\u88ab\u8c03\u7528\uff0c\u5b83\u4f1a\u4ece <code>self.proxy_list<\/code> \u4e2d\u968f\u673a\u9009\u62e9\u4e00\u4e2a\u4ee3\u7406 IP\uff0c\u5e76\u5c06\u5176\u8bbe\u7f6e\u5230 <code>request.meta['proxy']<\/code> \u4e2d\u3002\u5982\u679c\u4ee3\u7406\u9700\u8981\u8ba4\u8bc1\uff0c\u8fd8\u4f1a\u8bbe\u7f6e <code>Proxy-Authorization<\/code> \u8bf7\u6c42\u5934\u3002<\/li>\n\n\n\n<li><code>process_response<\/code> \u548c <code>process_exception<\/code> \u65b9\u6cd5\u7528\u4e8e\u5904\u7406\u8bf7\u6c42\u5931\u8d25\u7684\u60c5\u51b5\uff0c\u53ef\u4ee5\u6839\u636e\u5b9e\u9645\u9700\u6c42\u5b9e\u73b0\u91cd\u8bd5\u903b\u8f91\uff0c\u4f8b\u5982\u66f4\u6362\u65b0\u7684\u4ee3\u7406 IP \u8fdb\u884c\u91cd\u8bd5\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u5728 <code>settings.py<\/code> \u6587\u4ef6\u4e2d\u5b9a\u4e49 <code>PROXY_LIST<\/code>\u3002<\/strong> \u5c06\u4f60\u4e4b\u524d\u83b7\u53d6\u5230\u7684\u4ee3\u7406 IP \u5217\u8868\u8d4b\u503c\u7ed9 <code>PROXY_LIST<\/code>\uff1a Python<code># settings.py PROXY_LIST = [ 'http:\/\/ip1:port1', 'http:\/\/user:pass@ip2:port2', 'https:\/\/ip3:port3', # ... \u4f60\u7684\u4ee3\u7406 IP \u5217\u8868 ]<\/code> \u5982\u679c\u4f60\u4f7f\u7528\u7684\u662f\u6613\u8def\u4ee3\u7406\u7684 API \u63a5\u53e3\uff0c\u4f60\u53ef\u80fd\u9700\u8981\u7f16\u5199\u989d\u5916\u7684\u4ee3\u7801\u6765\u5b9a\u671f\u4ece API \u83b7\u53d6\u6700\u65b0\u7684\u53ef\u7528 IP \u5217\u8868\uff0c\u5e76\u66f4\u65b0 <code>PROXY_LIST<\/code>\u3002<\/li>\n<\/ol>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u7b2c\u4e09\u6b65\uff1a\u8003\u8651\u66f4\u667a\u80fd\u7684\u4ee3\u7406\u8f6e\u6362\u7b56\u7565<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e0a\u9762\u53ea\u662f\u4e00\u4e2a\u6700\u7b80\u5355\u7684\u968f\u673a\u8f6e\u6362\u4ee3\u7406\u7684\u5b9e\u73b0\u3002\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u4f60\u53ef\u80fd\u9700\u8981\u66f4\u667a\u80fd\u7684\u7b56\u7565\uff0c\u4f8b\u5982\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u57fa\u4e8e\u54cd\u5e94\u72b6\u6001\u7801\u8fdb\u884c\u8f6e\u6362\uff1a<\/strong> \u5f53\u67d0\u4e2a\u4ee3\u7406\u8fd4\u56de\u7279\u5b9a\u7684\u9519\u8bef\u7801\uff08\u5982 403, 407, 503\uff09\u65f6\uff0c\u7acb\u5373\u5207\u6362\u5230\u65b0\u7684\u4ee3\u7406\u3002<\/li>\n\n\n\n<li><strong>\u9650\u5236\u6bcf\u4e2a\u4ee3\u7406\u7684\u4f7f\u7528\u6b21\u6570\uff1a<\/strong> \u4e3a\u4e86\u907f\u514d\u5355\u4e2a\u4ee3\u7406\u88ab\u8fc7\u5ea6\u4f7f\u7528\uff0c\u53ef\u4ee5\u8bb0\u5f55\u6bcf\u4e2a\u4ee3\u7406\u7684\u4f7f\u7528\u6b21\u6570\uff0c\u5f53\u8fbe\u5230\u4e00\u5b9a\u9608\u503c\u540e\u5c31\u5c06\u5176\u4ece\u53ef\u7528\u5217\u8868\u4e2d\u79fb\u9664\u6216\u964d\u4f4e\u5176\u4f7f\u7528\u9891\u7387\u3002<\/li>\n\n\n\n<li><strong>\u6839\u636e\u76ee\u6807\u7f51\u7ad9\u8fdb\u884c\u4ee3\u7406\u9009\u62e9\uff1a<\/strong> \u67d0\u4e9b\u7f51\u7ad9\u53ef\u80fd\u5bf9\u7279\u5b9a\u5730\u533a\u7684 IP \u66f4\u654f\u611f\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8981\u8bbf\u95ee\u7684\u57df\u540d\u9009\u62e9\u5408\u9002\u7684\u4ee3\u7406 IP\u3002\u6bd4\u5982\uff0c\u5982\u679c\u4f60\u5728\u82f1\u56fd\u7528 Scrapy \u722c\u53d6\u82f1\u56fd\u672c\u5730\u7684\u7f51\u7ad9\uff0c\u4f7f\u7528\u6613\u8def\u4ee3\u7406\u63d0\u4f9b\u7684<strong>\u81ea\u8425\u7eaf\u51c0\u673a\u623f<\/strong>\u7684\u82f1\u56fd IP \u53ef\u80fd\u4f1a\u66f4\u7a33\u5b9a\u9ad8\u6548\u3002<\/li>\n\n\n\n<li><strong>\u96c6\u6210\u7b2c\u4e09\u65b9\u4ee3\u7406\u7ba1\u7406\u670d\u52a1\uff1a<\/strong> \u4e00\u4e9b\u9ad8\u7ea7\u7684\u4ee3\u7406\u670d\u52a1\u5546\uff08\u6bd4\u5982\u6613\u8def\u4ee3\u7406\u53ef\u80fd\u4f1a\u63d0\u4f9b\u7684\u76f8\u5173\u670d\u52a1\uff09\u4f1a\u63d0\u4f9b\u66f4\u5b8c\u5584\u7684\u4ee3\u7406\u7ba1\u7406 API\uff0c\u4f60\u53ef\u4ee5\u76f4\u63a5\u96c6\u6210\u8fd9\u4e9b API \u5230\u4f60\u7684 Scrapy \u9879\u76ee\u4e2d\uff0c\u5b9e\u73b0\u66f4\u667a\u80fd\u7684\u4ee3\u7406\u8f6e\u6362\u548c\u7ba1\u7406\u3002<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u7b2c\u56db\u6b65\uff1a\u6d4b\u8bd5\u4f60\u7684\u4ee3\u7406\u8bbe\u7f6e<\/strong><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u914d\u7f6e\u5b8c\u6210\u540e\uff0c\u4e00\u5b9a\u8981\u8fdb\u884c\u5145\u5206\u7684\u6d4b\u8bd5\uff0c\u786e\u4fdd\u4f60\u7684\u4ee3\u7406\u8f6e\u6362\u673a\u5236\u80fd\u591f\u6b63\u5e38\u5de5\u4f5c\u3002\u4f60\u53ef\u4ee5\u7f16\u5199\u4e00\u4e2a\u7b80\u5355\u7684 Scrapy spider\uff0c\u8bbf\u95ee\u4e00\u4e2a\u4f1a\u663e\u793a\u4f60 IP \u5730\u5740\u7684\u7f51\u7ad9\uff08\u4f8b\u5982 <code>httpbin.org\/ip<\/code>\uff09\uff0c\u7136\u540e\u89c2\u5bdf\u4f60\u7684\u722c\u866b\u5728\u8fd0\u884c\u8fc7\u7a0b\u4e2d\u662f\u5426\u4f1a\u5207\u6362 IP \u5730\u5740\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f60\u53ef\u4ee5\u5728\u4f60\u7684 spider \u7684 <code>parse<\/code> \u65b9\u6cd5\u4e2d\u6253\u5370\u51fa\u54cd\u5e94\u7684 IP \u5730\u5740\uff1a<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Python<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import scrapy\n\nclass IPTestSpider(scrapy.Spider):\n    name = 'iptest'\n    start_urls = &#91;'http:\/\/httpbin.org\/ip']\n\n    def parse(self, response):\n        print(f\"Current IP: {response.json()&#91;'origin']}\")\n<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd0\u884c\u8fd9\u4e2a spider \u591a\u6b21\uff0c\u4f60\u5e94\u8be5\u4f1a\u770b\u5230\u4e0d\u540c\u7684 IP \u5730\u5740\u8f93\u51fa\uff0c\u8fd9\u8868\u660e\u4f60\u7684\u4ee3\u7406\u8f6e\u6362\u5df2\u7ecf\u751f\u6548\u4e86\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u4e00\u4e9b\u989d\u5916\u7684\u5efa\u8bae\u548c\u6ce8\u610f\u4e8b\u9879\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>User-Agent \u7684\u8f6e\u6362\u4e5f\u5f88\u91cd\u8981\uff1a<\/strong> \u9664\u4e86\u8f6e\u6362 IP \u5730\u5740\uff0c\u6a21\u62df\u4e0d\u540c\u7684\u6d4f\u89c8\u5668 User-Agent \u4e5f\u662f\u4e00\u4e2a\u6709\u6548\u7684\u53cd\u722c\u7b56\u7565\u3002\u4f60\u53ef\u4ee5\u4f7f\u7528 Scrapy \u81ea\u5e26\u7684 <code>UserAgentMiddleware<\/code> \u6216\u8005\u81ea\u5b9a\u4e49\u4e00\u4e2a User-Agent \u4e2d\u95f4\u4ef6\uff0c\u914d\u5408 <code>fake-useragent<\/code> \u5e93\u751f\u6210\u968f\u673a\u7684 User-Agent\u3002<\/li>\n\n\n\n<li><strong>\u8bbe\u7f6e\u5408\u7406\u7684\u4e0b\u8f7d\u5ef6\u8fdf\uff08<code>DOWNLOAD_DELAY<\/code>\uff09\uff1a<\/strong> \u8fc7\u4e8e\u9891\u7e41\u7684\u8bf7\u6c42\u4e5f\u4f1a\u89e6\u53d1\u53cd\u722c\u673a\u5236\uff0c\u9002\u5f53\u589e\u52a0\u4e0b\u8f7d\u5ef6\u8fdf\u53ef\u4ee5\u964d\u4f4e\u88ab\u5c01\u7684\u98ce\u9669\u3002<\/li>\n\n\n\n<li><strong>\u5904\u7406\u4ee3\u7406\u8fde\u63a5\u9519\u8bef\u548c\u8d85\u65f6\uff1a<\/strong> \u4ee3\u7406\u670d\u52a1\u5668\u4e5f\u53ef\u80fd\u4e0d\u7a33\u5b9a\uff0c\u4f60\u7684\u4ee3\u7801\u5e94\u8be5\u80fd\u591f\u5904\u7406\u8fde\u63a5\u9519\u8bef\u548c\u8d85\u65f6\u7b49\u60c5\u51b5\uff0c\u5e76\u8fdb\u884c\u91cd\u8bd5\u6216\u8005\u5207\u6362\u5230\u5176\u4ed6\u53ef\u7528\u4ee3\u7406\u3002<\/li>\n\n\n\n<li><strong>\u9075\u5b88\u76ee\u6807\u7f51\u7ad9\u7684 <code>robots.txt<\/code> \u534f\u8bae\uff1a<\/strong> \u8fd9\u662f\u7f51\u7edc\u722c\u866b\u7684\u57fa\u672c\u9053\u5fb7\u51c6\u5219\uff0c\u52a1\u5fc5\u5c0a\u91cd\u7f51\u7ad9\u7684\u722c\u53d6\u89c4\u5219\u3002<\/li>\n\n\n\n<li><strong>\u5b9a\u671f\u68c0\u67e5\u4f60\u7684\u4ee3\u7406 IP \u7684\u6709\u6548\u6027\uff1a<\/strong> \u514d\u8d39\u7684\u4ee3\u7406 IP \u8d28\u91cf\u901a\u5e38\u4e0d\u9ad8\uff0c\u4e0d\u7a33\u5b9a\u4e14\u5bb9\u6613\u5931\u6548\u3002\u5982\u679c\u4f60\u4f9d\u8d56\u9ad8\u8d28\u91cf\u7684\u4ee3\u7406\uff0c\u6bd4\u5982\u6613\u8def\u4ee3\u7406\u63d0\u4f9b\u7684\u670d\u52a1\uff0c\u4ed6\u4eec\u901a\u5e38\u4f1a\u7ef4\u62a4 IP \u7684\u53ef\u7528\u6027\u3002\u4f46\u5982\u679c\u4f60\u4f7f\u7528\u7684\u662f\u81ea\u5df1\u6536\u96c6\u7684\u4ee3\u7406\uff0c\u9700\u8981\u5b9a\u671f\u8fdb\u884c\u68c0\u67e5\uff0c\u79fb\u9664\u65e0\u6548\u7684 IP\u3002<\/li>\n\n\n\n<li><strong>\u8003\u8651\u4f7f\u7528\u4ee3\u7406\u6c60\uff1a<\/strong> \u5bf9\u4e8e\u66f4\u590d\u6742\u7684\u722c\u866b\u9879\u76ee\uff0c\u7ef4\u62a4\u4e00\u4e2a\u52a8\u6001\u7684\u4ee3\u7406\u6c60\u53ef\u80fd\u66f4\u6709\u6548\u3002\u4f60\u53ef\u4ee5\u5b9e\u73b0\u4e00\u4e2a\u72ec\u7acb\u7684\u6a21\u5757\u6765\u7ba1\u7406\u4ee3\u7406 IP \u7684\u83b7\u53d6\u3001\u9a8c\u8bc1\u548c\u8f6e\u6362\u3002<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u603b\u800c\u8a00\u4e4b\uff0c\u5728\u82f1\u56fd\u4f7f\u7528 Scrapy \u8fdb\u884c\u7f51\u7edc\u722c\u866b\u5f00\u53d1\uff0c\u5408\u7406\u5730\u8bbe\u7f6e\u8f6e\u6362 ISP \u4ee3\u7406\u662f\u81f3\u5173\u91cd\u8981\u7684\u4e00\u6b65\uff0c\u53ef\u4ee5\u6709\u6548\u5730\u63d0\u9ad8\u722c\u866b\u7684\u7a33\u5b9a\u6027\u548c\u6548\u7387\uff0c\u964d\u4f4e\u88ab\u5c01\u7684\u98ce\u9669\u3002\u9009\u62e9\u50cf<strong>\u6613\u8def\u4ee3\u7406<\/strong>\u8fd9\u6837\u62e5\u6709<strong>\u81ea\u8425\u7eaf\u51c0\u673a\u623f\u548c\u9ad8\u8d28\u91cf\u4f4f\u5b85\u7ebf\u8def<\/strong>\u7684\u9876\u7ea7 IP \u4ee3\u7406\u5e73\u53f0\uff0c\u80fd\u591f\u4e3a\u4f60\u63d0\u4f9b\u66f4\u53ef\u9760\u7684 IP \u8d44\u6e90\uff0c\u52a9\u4f60<strong>\u8f7b\u677e\u5e94\u5bf9\u5404\u79cd\u7f51\u7edc\u6311\u6218<\/strong>\uff0c\u8ba9\u4f60\u7684\u6570\u636e\u91c7\u96c6\u5de5\u4f5c\u66f4\u52a0\u987a\u7545\uff01\u5e0c\u671b\u8fd9\u7bc7\u6587\u7ae0\u80fd\u591f\u5e2e\u52a9\u4f60\u5728\u82f1\u56fd\u6210\u529f\u914d\u7f6e Scrapy \u7684\u8f6e\u6362 ISP \u4ee3\u7406\uff0c\u6293\u53d6\u5230\u4f60\u60f3\u8981\u7684\u6570\u636e<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8bdd\u8bf4\uff0c\u54b1\u4eec\u5728\u82f1\u56fd\u8fd9\u65ee\u65ef\uff0c\u8981\u662f\u60f3\u7528 Scrapy \u8fd9\u628a\u722c\u866b\u5229\u5668\u53bb\u7f51\u4e0a\u6e9c\u8fbe\u6e9c\u8fbe\uff0c\u6293\u70b9\u6570\u636e\u5565\u7684\uff0c\u6709\u65f6\u5019\u4f1a\u9047\u5230\u70b9\u5c0f\u9ebb\u70e6 &#8230; <a title=\"\u5982\u4f55\u5728\u82f1\u56fd\u4e3a Scrapy \u8bbe\u7f6e\u8f6e\u6362 ISP \u4ee3\u7406\" class=\"read-more\" href=\"https:\/\/www.yilus5.com\/blog\/2239.html\" aria-label=\"\u9605\u8bfb \u5982\u4f55\u5728\u82f1\u56fd\u4e3a Scrapy \u8bbe\u7f6e\u8f6e\u6362 ISP \u4ee3\u7406\">\u9605\u8bfb\u66f4\u591a<\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[8],"tags":[],"class_list":["post-2239","post","type-post","status-publish","format-standard","hentry","category-yiluproxy6"],"_links":{"self":[{"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/posts\/2239","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/comments?post=2239"}],"version-history":[{"count":1,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/posts\/2239\/revisions"}],"predecessor-version":[{"id":2240,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/posts\/2239\/revisions\/2240"}],"wp:attachment":[{"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/media?parent=2239"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/categories?post=2239"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.yilus5.com\/blog\/wp-json\/wp\/v2\/tags?post=2239"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}