piplines.py
class DuoTestPipeline(object):
def __init__(self,db_pool):
self.db_pool=db_pool
@classmethod
#从setting中读取数据
def from_settings(cls, settings):
db_params = dict(
db=settings.get('MYSQL_DB_NAME'),
host=settings.get('MYSQL_HOST'),
port=settings.get('MYSQL_PORT'),
user=settings.get('MYSQL_USER'),
passwd=settings.get('MYSQL_PASSWORD'),
charset=settings.get('MYSQL_CHARSET'),
use_unicode=True,
# 设置游标类型
cursorclass=pymysql.cursors.DictCursor
)
# 创建连接池
db_pool = adbapi.ConnectionPool('pymysql', **db_params)
return cls(db_pool)
def process_item(self, item, spider):
#把要执行的sql放入连接池
query=self.db_pool.runInteraction(self.insert_into, item)
#如果sql执行发送错误自动回调addErrBack()函数
query.addErrback(self.handle_error,item,spider)
def insert_into(self,cursor,item):
cursor.execute(item['zong'][0], item['zong'][1])
def handle_error(self,failure,item,spider):
print(failure)
def data_list(self):
pass
items.py
storage_type = scrapy.Field() # 存储类型 analysis_type = scrapy.Field() # 解析网站 zong = scrapy.Field()#数据汇总
settings.py
MYSQL_DB_NAME='**' MYSQL_HOST='**' MYSQL_USER='**' MYSQL_PASSWORD='**' MYSQL_POST=3306 MYSQL_CHARSET='utf8mb4'
spider.py
sql='insert into tiebadata(publish_name,publish_time,publish_url,publish_content,comment_content,comment_time,comment_name,keyword,app_name,run_time)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' data=(item['publish_name'], item['publish_time'], item['publish_url'], item['publish_content'], item['comment_content'], item['comment_time'], item['comment_name'], item['keyword'], item['app_name'], item['run_time']) item['zong']=[sql,data]
如有不恰当处,请指出谢谢
来源:CSDN
作者:蛊i
链接:https://blog.csdn.net/qq_36197940/article/details/84788778