lxml中的xpath股票信息提取:
import urllib.request
import lxml
import lxml.etree
import re
def download(url):
headers={"User-Agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);"}
request = urllib.request.Request(url, headers=headers) # 请求,修改,模拟http.
data = urllib.request.urlopen(request).read() # 打开请求,抓取数据
#data=data.decode("gbk").encode("utf-8")#乱码
mytree=lxml.etree.HTML(data)
datalist=mytree.xpath("//*[@id=\"datalist\"]//tr//td//text()")
print(datalist)
for linedata in datalist:
print(linedata)
download("http://quote.stockstar.com/fund/stock_3_1_2.html")
来源:https://www.cnblogs.com/my-global/p/12454078.html