python爬虫分章节保存小说

#coding:utf-8import requests,re,osfrom bs4 import BeautifulSoupnames = []  # 存放章节名urls = []  # 存放章节链接nums = 0  # 章节数url = 'http://www.xbiquge.la/20/20948/'response = requests.get(url)response.encoding='utf-8'   #解决乱码问题，直接response.encoding"""解析url,查找标签div,id='list'，在查找到的列表中找出a标签"""soup = BeautifulSoup(response.text,'lxml')div =soup.find_all('div',id='list')a_bf = BeautifulSoup(str(div),'lxml')a = a_bf.find_all('a')"""将a标签中的链接href添加到urls中，将div标签的属性添加到目录中"""for div in a:    names.append(div.string)    urls.append("http://www.xbiquge.la"+div.get('href'))"""在该路径下，检查是否有该文件夹"""div1 = soup.find_all('div',id='info')t_bf= BeautifulSoup(str(div1),'lxml')h=t_bf.find('h1')h = h.stringt='C:\\Users\\Administrator\\Desktop\\%s'%hif not os.path.exists(t):    os.mkdir(t)"""循环输出urls，并逐一解析，找到小说所在的p标签，并保存""""""在文件夹下创建章节同名txt文件"""i = 0while i <len(urls):    response1 = requests.get(url=urls[i])    response1.encoding = 'utf-8'    c = re.findall('<div id="content">(.*?)</div>',response1.text)    soup1 = BeautifulSoup(response1.text,'lxml')    b_bf = BeautifulSoup(str(c),'lxml')    src =a[i].get_text() + '.txt'    print(src)    filename = t+'/'+src    f = open(filename, 'w+', encoding='utf-8')    f.write(a[i].get_text()+'\n')    for result in b_bf:        e = b_bf.find_all('p')        res = result.text        f.write(res)        i += 1
来源：博客园
作者：疾风不弃
链接：https://www.cnblogs.com/hfct/p/11652007.html
标签
python爬虫
response
lxml
python