class proxyMiddleware(object): def __init__(self): self.ip_pool_cc = [] self.get_ip_url_cc = 'http://xxxxx' self.ip_pool_cq = [] self.get_ip_url_cq = 'http://xxxxx' def process_request(self, request, spider): if "zlwmw_cc" in spider.name: url_test = "http://xxxxxxxxx" pro_addr = self.base_proxy(self.ip_pool_cc,self.get_ip_url_cc,url_test) request.meta['proxy'] = "http://" + pro_addr elif "zlwmw_cq" in spider.name: url_test = "http://xxxxxxxxx" pro_addr = self.base_proxy(self.ip_pool_cq, self.get_ip_url_cq,url_test) request.meta['proxy'] = "http://" + pro_addr def base_proxy(self,ip_pool,get_ip_url_0,url_test): while True: # time.sleep(0.1) if len(ip_pool) < 5: get_ip_url = get_ip_url_0 ips_0 = requests.get(get_ip_url).text #{"code":111,"data":[],"msg":"请2秒后再试","success":false} <class 'str'> if "请2秒后再试" in ips_0: time.sleep(2) ips_0 = requests.get(get_ip_url).text if "请2秒后再试" in ips_0: time.sleep(4) ips_0 = requests.get(get_ip_url).text if "请2秒后再试" in ips_0: time.sleep(6) ips_0 = requests.get(get_ip_url).text ips = ips_0.split('\n') for i in ips[:-1]: ip_pool.append(i.strip()) else: break pro_addr = random.choice(ip_pool) while True: # time.sleep(0.1) url = url_test proxies = { "http": pro_addr, } try: s = requests.session() s.keep_alive = False # 关闭多余连接 response = s.get(url=url, proxies=proxies, timeout=2, verify=False) code = response.status_code except Exception as e: print(e) code = '0' if code == 200: return pro_addr break else: ip_pool.remove(pro_addr) while True: if len(ip_pool) < 5: get_ip_url = get_ip_url_0 ips_0 = requests.get(get_ip_url).text if "请2秒后再试" in ips_0: time.sleep(2) ips_0 = requests.get(get_ip_url).text if "请2秒后再试" in ips_0: time.sleep(4) ips_0 = requests.get(get_ip_url).text if "请2秒后再试" in ips_0: time.sleep(6) ips_0 = requests.get(get_ip_url).text ips = ips_0.split('\n') for i in ips[:-1]: ip_pool.append(i.strip()) else: break pro_addr = random.choice(ip_pool)
来源:https://www.cnblogs.com/qiaoer1993/p/11639666.html