爬天极网韩天任进程池.py

淺唱寂寞╮ 提交于 2020-01-18 18:49:21
import osimport requests# 导入进程:from multiprocessing import Poolfrom bs4 import BeautifulSoup# 定义下载图片功能:def download_img(url, dirname=""):    res = requests.request("get", url)    filename = url.split("/")[-1]    with open(dirname + "/" + filename, "wb") as f:        f.write(res.content)    print(f"{dirname}{filename}下载成功!")# 定义找到大图片:def find_big_img(url):    res = requests.request("get", url)    bs = BeautifulSoup(res.content, "html.parser")    div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"})    img = div_obj.find("img")    # print(img,"u")    current_img_url = img.get("src")    return current_img_urldef get_page_count(url):    res = requests.request("get", url)    bs = BeautifulSoup(res.content, "html.parser")    bs.find(name="div", attrs={"class": "flym"}).find_all(name="")baseurl = "http://pic.yesky.com/c/6_18332"def run(url, num):    res = requests.request("get", f"{url}_{num}.shtml")    bs = BeautifulSoup(res.text, "html.parser")    # lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a")    lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd")    print(lst)    for i in lst:        # i.find("a").get("title"),"qwdeawdawfesf"        dirname = i.find("a").get("title")        if os.path.isdir(dirname):            pass        else:            os.mkdir(dirname)        link = i.find("a").get("href")        # print(link)        res1 = requests.request("get", link)        # print(res1.content)        bs1 = BeautifulSoup(res1.content, "html.parser")        div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"})        img = div_obj.find("img")        # print(img)        current_img_url = img.get("src")        download_img(current_img_url, dirname)        div_overview = bs1.find(name="div", attrs={"class": "overview"})        for i in div_overview.find_all("a"):            if link == i.get("href"):                continue            else:                url = i.get("href")                download_img(find_big_img(url), dirname)if __name__ == '__main__':    #开5个进程:    pool = Pool(5)    #循环第1到7页    for i in range(1, 8):        #pool.apply_async(函数名,(函数的参数))        pool.apply_async(run, (baseurl, i))    pool.close()    pool.join()    # print('非阻塞~~~~')    # print('end')
效果如下:

 

 


标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!