import osimport requests# 导入进程:from multiprocessing import Poolfrom bs4 import BeautifulSoup# 定义下载图片功能:def download_img(url, dirname=""): res = requests.request("get", url) filename = url.split("/")[-1] with open(dirname + "/" + filename, "wb") as f: f.write(res.content) print(f"{dirname}{filename}下载成功!")# 定义找到大图片:def find_big_img(url): res = requests.request("get", url) bs = BeautifulSoup(res.content, "html.parser") div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"}) img = div_obj.find("img") # print(img,"u") current_img_url = img.get("src") return current_img_urldef get_page_count(url): res = requests.request("get", url) bs = BeautifulSoup(res.content, "html.parser") bs.find(name="div", attrs={"class": "flym"}).find_all(name="")baseurl = "http://pic.yesky.com/c/6_18332"def run(url, num): res = requests.request("get", f"{url}_{num}.shtml") bs = BeautifulSoup(res.text, "html.parser") # lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a") lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd") print(lst) for i in lst: # i.find("a").get("title"),"qwdeawdawfesf" dirname = i.find("a").get("title") if os.path.isdir(dirname): pass else: os.mkdir(dirname) link = i.find("a").get("href") # print(link) res1 = requests.request("get", link) # print(res1.content) bs1 = BeautifulSoup(res1.content, "html.parser") div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"}) img = div_obj.find("img") # print(img) current_img_url = img.get("src") download_img(current_img_url, dirname) div_overview = bs1.find(name="div", attrs={"class": "overview"}) for i in div_overview.find_all("a"): if link == i.get("href"): continue else: url = i.get("href") download_img(find_big_img(url), dirname)if __name__ == '__main__': #开5个进程: pool = Pool(5) #循环第1到7页 for i in range(1, 8): #pool.apply_async(函数名,(函数的参数)) pool.apply_async(run, (baseurl, i)) pool.close() pool.join() # print('非阻塞~~~~') # print('end')
效果如下:
来源:https://www.cnblogs.com/zhang-da/p/12209850.html