Python爬取链家二手房源信息
爬取链家网站二手房房源信息,第一次做,仅供参考,要用scrapy。 import scrapy,pypinyin,requests import bs4 from ..items import LianjiaItem class LianjiaSpider(scrapy.Spider): name = 'lianjia_dl' allowed_domains = ['www.lianjia.com'] start_urls = [] url_0 = 'https://www.lianjia.com/city/' res = requests.get(url_0) bs_cs = bs4.BeautifulSoup(res.text,'html.parser') xinxi_cs = bs_cs.find_all('div',class_='city_province') for data_cs in xinxi_cs: cs_s = data_cs.find('ul').find_all('li') for cs_1 in cs_s: yess = cs_1.find('a')['href'] if yess.find('fang')>=0: #若fang字符串在yess中,则yess.find('fang')是大于等于0的,显示在字符串中的位置 continue else: