python-爬虫-庆余年小说-词云胡乱分析
进入正题,首先要搞到资源,我先去了搜索了一番,找到个网站“落霞”。一言不合就按下了F12,翻了下网页源码,超级简单。 from bs4 import BeautifulSoup from requests import Session from re import sub , DOTALL sess = Session ( ) txt = [ ] url = 'https://www.luoxia.com/qing/48416.htm' def find ( url ) : res = sess . get ( url ) soup = BeautifulSoup ( res . content , 'html.parser' ) title = soup . find ( 'title' ) div = soup . find ( 'div' , id = 'nr1' ) ps = div . find_all ( 'p' ) page = title . text + '\n' print ( page ) for p in ps : page += p . text + '\n' txt . append ( page ) try : a = soup . find ( 'a' , rel = 'next' ) href = a [ 'href' ] except :