import requests from lxml import etree url_domain="https://www.dytt8.net" headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" } def get_detail_urls(url): response=requests.get(url,headers=headers) text =response.content.decode('gbk', "ignore") html=etree.HTML(text) detail_urls=html.xpath("//table[@class='tbspan']//a[2]/@href") detail_urls=map(lambda url:url_domain+url,detail_urls) return detail_urls def parse_info(info,rule): return info.replace(rule, "").strip() def parse_detail_url(url="https://www.dytt8.net