题目:爬取今日头条新闻网的左边侧栏,并且以csv为文件的形式保存
代码:
import io
import sys
import urllib.request
import pandas as pd
from pyquery import PyQuery as pq
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') #改变标准输出的默认编码
url = 'https://mini.eastday.com/jrdftt/'
def get_Info(url):
res=urllib.request.urlopen(url)
htmlBytes=res.read()
doc = pq(htmlBytes.decode('utf-8'))
res = doc(".channel-item span")
t = [i.text for i in res]
se = pd.Series(t)
se.to_csv("列表.csv")
结果:
