Python学习笔记——爬虫之Scrapy项目实战
目录 手机App抓包爬虫 阳光热线问政平台 (实战项目三)新浪网分类资讯爬虫 Cosplay图片下载爬虫 用Pymongo保存数据 三种Scrapy模拟登陆策略 手机App抓包爬虫 1. items.py class DouyuspiderItem(scrapy.Item): name = scrapy.Field()# 存储照片的名字 imagesUrls = scrapy.Field()# 照片的url路径 imagesPath = scrapy.Field()# 照片保存在本地的路径 2. spiders/douyu.py import scrapy import json from douyuSpider.items import DouyuspiderItem class DouyuSpider(scrapy.Spider): name = "douyu" allowd_domains = ["http://capi.douyucdn.cn"] offset = 0 url = "http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=" start_urls = [url + str(offset)] def parse(self, response): # 返回从json里获取 data段数据集合