python爬虫爬取链家二手房信息
#coding=utf-8 import requests from fake_useragent import UserAgent from bs4 import BeautifulSoup import json import csv import time # 构建请求头 userAgent = UserAgent() headers = { 'user-agent': userAgent .Chrome } # 声明一个列表存储字典 data_list = [] def start_spider(page): #设置重连次数 requests.adapters.DEFAULT_RETRIES = 15 s = requests.session() #设置连接活跃状态为False s.keep_alive = False #爬取的url,默认爬取的南京的链家房产信息 url = 'https://nj.lianjia.com/ershoufang/pg{}/'.format(page) # 请求url resp = requests.get(url, headers=headers,timeout=10) # 讲返回体转换成Beautiful soup = BeautifulSoup(resp.content, 'lxml') # 筛选全部的li标签