Best practice for using python pool.apply_async() with callback function

问题

For pool.apply_async() what is the best practice for accumulating the results coming from each process? Is it job.get() or job.wait()? what about job.ready() and job.successful()?

Is it possible to accumulate each result in a global variable in each process, so that we do not end up with one process in S (sleep) mode for a long time trying to accumulate the results coming from each process?

import multiprocessing
import os
import numpy as np

def prepare_data_fill_arrays(simNum,chrLong):
    print('prepare_data_fill_arrays worker id:%s simNum:%d %s' %(str(os.getpid()),simNum,chrLong))
    arrayList=[]
    array1=np.ones((1,10))*simNum
    array2=np.ones((1,10))*simNum
    arrayList.append(simNum)
    arrayList.append(chrLong)
    arrayList.append(array1)
    arrayList.append(array2)
    return arrayList

if __name__ == '__main__':
    numofSimulations = 10
    chromNamesList = ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chrX', 'chr8', 'chr9', 'chr10', 'chr11',
                      'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr20', 'chrY', 'chr19', 'chr22',
                      'chr21', 'chrM']

    sim_nums = range(0, numofSimulations + 1)
    sim_num_chr_tuples = ((sim_num, chrLong) for sim_num in sim_nums for chrLong in chromNamesList)

    jobs=[]
    accumulatedArray1=np.zeros((numofSimulations,10))
    accumulatedArray2=np.zeros((numofSimulations,10))

    def accumulateArray(arrayList):
        try:
            simNum=arrayList[0]
            chrLong=arrayList[1]
            array1 = arrayList[2]
            array2 = arrayList[3]
            accumulatedArray1[simNum-1]=array1
            accumulatedArray2[simNum-1]=array2
            print('ACCUMULATION simNum:%d chrLong:%s' %(simNum,chrLong))
        except Exception as e:
            print("Exception: %s" %(e))

    with multiprocessing.Pool() as pool:
        for simNum, chrLong in sim_num_chr_tuples:
            jobs.append(pool.apply_async(prepare_data_fill_arrays,
                                 args=(simNum,chrLong,),
                                 callback=accumulateArray))
        for job in jobs:
            job.get()

    print("accumulatedArray1:%s" %(accumulatedArray1))
    print("accumulatedArray2:%s" %(accumulatedArray2))

来源：https://stackoverflow.com/questions/65375778/best-practice-for-using-python-pool-apply-async-with-callback-function

标签

python

multiprocessing

pool