Best practice for using python pool.apply_async() with callback function

好久不见. 提交于 2021-02-11 12:03:26

问题


For pool.apply_async() what is the best practice for accumulating the results coming from each process? Is it job.get() or job.wait()? what about job.ready() and job.successful()?

Is it possible to accumulate each result in a global variable in each process, so that we do not end up with one process in S (sleep) mode for a long time trying to accumulate the results coming from each process?

import multiprocessing
import os
import numpy as np

def prepare_data_fill_arrays(simNum,chrLong):
    print('prepare_data_fill_arrays worker id:%s simNum:%d %s' %(str(os.getpid()),simNum,chrLong))
    arrayList=[]
    array1=np.ones((1,10))*simNum
    array2=np.ones((1,10))*simNum
    arrayList.append(simNum)
    arrayList.append(chrLong)
    arrayList.append(array1)
    arrayList.append(array2)
    return arrayList

if __name__ == '__main__':
    numofSimulations = 10
    chromNamesList = ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chrX', 'chr8', 'chr9', 'chr10', 'chr11',
                      'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr20', 'chrY', 'chr19', 'chr22',
                      'chr21', 'chrM']

    sim_nums = range(0, numofSimulations + 1)
    sim_num_chr_tuples = ((sim_num, chrLong) for sim_num in sim_nums for chrLong in chromNamesList)

    jobs=[]
    accumulatedArray1=np.zeros((numofSimulations,10))
    accumulatedArray2=np.zeros((numofSimulations,10))

    def accumulateArray(arrayList):
        try:
            simNum=arrayList[0]
            chrLong=arrayList[1]
            array1 = arrayList[2]
            array2 = arrayList[3]
            accumulatedArray1[simNum-1]=array1
            accumulatedArray2[simNum-1]=array2
            print('ACCUMULATION simNum:%d chrLong:%s' %(simNum,chrLong))
        except Exception as e:
            print("Exception: %s" %(e))

    with multiprocessing.Pool() as pool:
        for simNum, chrLong in sim_num_chr_tuples:
            jobs.append(pool.apply_async(prepare_data_fill_arrays,
                                 args=(simNum,chrLong,),
                                 callback=accumulateArray))
        for job in jobs:
            job.get()

    print("accumulatedArray1:%s" %(accumulatedArray1))
    print("accumulatedArray2:%s" %(accumulatedArray2))

来源:https://stackoverflow.com/questions/65375778/best-practice-for-using-python-pool-apply-async-with-callback-function

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!