安装 pymongo 模块
conda/pip install pymongo
使用步骤
创建数据库连接
pymongo.MongoClient(host='localhost', port=27017)
本地ip:localhost
默认端口:27017
获得数据库对象
数据库连接.数据库名称
获得 collection 对象
数据库名称.collection名称
爬取豆瓣Top250信息并保存到MongoDB
1 # -*- coding: utf-8 -*-
2 import pymongo
3
4 # Define your item pipelines here
5 #
6 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
7 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
8
9
10 class ScrapyDoubanMongoPipeline(object):
11
12 def open_spider(self, spider):
13 self.conn = pymongo.MongoClient(host='localhost', port=27017)
14 self.db = self.conn.douban_movie
15 # self.db = self.conn['douban_movie']
16 self.movies = self.db.movies
17
18 def process_item(self, item, spider):
19 self.movies.insert(
20 {
21 "title":item['title'],
22 "score":item['score'],
23 "rank":item['rank'],
24 "abstract":item['abstract'],
25 "describe":item['describe'],
26 }
27 )
28
29 def close_spider(self, spider):
30 self.conn.close()