问题
For pool.apply_async() what is the best practice for accumulating the results coming from each process?
Is it job.get() or job.wait()? what about job.ready() and job.successful()?
Is it possible to accumulate each result in a global variable in each process, so that we do not end up with one process in S (sleep) mode for a long time trying to accumulate the results coming from each process?
import multiprocessing
import os
import numpy as np
def prepare_data_fill_arrays(simNum,chrLong):
print('prepare_data_fill_arrays worker id:%s simNum:%d %s' %(str(os.getpid()),simNum,chrLong))
arrayList=[]
array1=np.ones((1,10))*simNum
array2=np.ones((1,10))*simNum
arrayList.append(simNum)
arrayList.append(chrLong)
arrayList.append(array1)
arrayList.append(array2)
return arrayList
if __name__ == '__main__':
numofSimulations = 10
chromNamesList = ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chrX', 'chr8', 'chr9', 'chr10', 'chr11',
'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr20', 'chrY', 'chr19', 'chr22',
'chr21', 'chrM']
sim_nums = range(0, numofSimulations + 1)
sim_num_chr_tuples = ((sim_num, chrLong) for sim_num in sim_nums for chrLong in chromNamesList)
jobs=[]
accumulatedArray1=np.zeros((numofSimulations,10))
accumulatedArray2=np.zeros((numofSimulations,10))
def accumulateArray(arrayList):
try:
simNum=arrayList[0]
chrLong=arrayList[1]
array1 = arrayList[2]
array2 = arrayList[3]
accumulatedArray1[simNum-1]=array1
accumulatedArray2[simNum-1]=array2
print('ACCUMULATION simNum:%d chrLong:%s' %(simNum,chrLong))
except Exception as e:
print("Exception: %s" %(e))
with multiprocessing.Pool() as pool:
for simNum, chrLong in sim_num_chr_tuples:
jobs.append(pool.apply_async(prepare_data_fill_arrays,
args=(simNum,chrLong,),
callback=accumulateArray))
for job in jobs:
job.get()
print("accumulatedArray1:%s" %(accumulatedArray1))
print("accumulatedArray2:%s" %(accumulatedArray2))
来源:https://stackoverflow.com/questions/65375778/best-practice-for-using-python-pool-apply-async-with-callback-function