通过爬虫实现xici可以使用的代理IP 端口
主要代码:
#!/usr/bin/env python
#coding:utf8
import telnetlib
from urllib import request
import re
class getXici():
def __init__(self):
self.url = "http://www.xicidaili.com"
self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}
self.page = ""
def get_page(self):
req = request.Request(url=self.url,headers=self.header)
page = request.urlopen(req).read().decode('utf-8')
self.page = page
def is_available(self,ip, port):
try:
t = telnetlib.Telnet(ip, port)
except Exception as e:
status = 0
return status
status = 1
return status
def get_ip_list(self):
# print(self.page)
# proxy_type = r'<th colspan="8">.*?<h2>(.*?)</h2>'
# proxy_reg = re.compile(proxy_type,re.S)
# proxy_text = proxy_reg.findall(self.page)
# print(proxy_text)
proxy_reg = re.compile(r'<td class="country"><img src="(.*?)" alt="Cn" /></td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td class="country">(.*?)</td>',re.S)
print(proxy_reg)
proxy_list = proxy_reg.findall(self.page)
for line in proxy_list:
# print(line)
data = list(line)
# print(data)
flag = self.is_available(data[1],data[2])
if flag == 1:
print(" {0} {1} {2} 可以使用 ".format(data[1],data[2],data[3]))
if __name__ == "__main__":
xici = getXici()
xici.get_page()
xici.get_ip_list()
运行结果:

随机抽取一个验证是否可用:

来源:https://www.cnblogs.com/pythonlx/p/8325649.html