1. 线程池的基本用法
# coding: utf-8 from concurrent.futures import ThreadPoolExecutor import time def spider(page): time.sleep(page) print(f"crawl task{page} finished") return page with ThreadPoolExecutor(max_workers=5) as t: # 创建一个最大容纳数量为5的线程池 task1 = t.submit(spider, 1) task2 = t.submit(spider, 2) # 通过submit提交执行的函数到线程池中 task3 = t.submit(spider, 3) print(f"task1: {task1.done()}") # 通过done来判断线程是否完成 print(f"task2: {task2.done()}") print(f"task3: {task3.done()}") time.sleep(2.5) print(f"task1: {task1.done()}") print(f"task2: {task2.done()}") print(f"task3: {task3.done()}") print(task1.result()) # 通过result来获取返回值
executor = ThreadPoolExecutor(max_workers=5) all_task = [executor.submit(send_data,"a" ) for i in range(1,2)] ''' 上面的代码等价于下面的: all_task = [] for i in range(1,2): task = executor.submit(send_data,"b" ) # "b"是给 方法send_data 传参,如果send_data有多个参数, executor.submit(send_data,"b","c","d" ) all_task.append(task) ''' for future in as_completed(all_task): print("finish the task") obj_data = future.result() print("obj_data is ", obj_data) def send_data(a): print("a is ", a)
as_completed:
上面虽然提供了判断任务是否结束的方法,但是不能在主线程中一直判断啊。最好的方法是当某个任务结束了,就给主线程返回结果,而不是一直判断每个任务是否结束。
ThreadPoolExecutorThreadPoolExecutor 中 的 as_completed() 就是这样一个方法,当子线程中的任务执行完后,直接用 result() 获取返回结果
参考:
wait ,map的用法可以参考下面的链接:
https://www.jianshu.com/p/6d6e4f745c27