aiohttp爬虫的模板,类的形式
1 import asyncio 2 import aiohttp 3 import async_timeout 4 from lxml import html 5 from timeit import default_timer as timer 6 7 from db import DBData 8 9 10 class Crawler: 11 def __init__ (self, ** kwargs): 12 self.domains = kwargs[ " domains " ] 13 self.max_depth = kwargs[ " max_depth " ] 14 self.max_retries = 3 15 self.max_workers = 10 16 self.Q = asyncio.Queue() 17 self.db_Q = asyncio.Queue() 18 self.cache = set() 19 self.count = 0 20 self.loop = asyncio.get_event_loop() 21 self.db_data = DBData() 22 23 # Clear 24 self.db_data.clear_crawler() 25 26 async def get(self, url, timeout): 27