Starmap combined with tqdm?

前端 未结 2 1872
野性不改
野性不改 2020-12-18 22:46

I am doing some parallel processing, as follows:

with mp.Pool(8) as tmpPool:
        results = tmpPool.starmap(my_function, inputs)

where i

相关标签:
2条回答
  • 2020-12-18 23:08

    The temporary solution: rewriting the method to-be-parallelized with imap.

    0 讨论(0)
  • 2020-12-18 23:33

    It's not possible with starmap(), but it's possible with a patch adding Pool.istarmap(). It's based on the code for imap(). All you have to do, is create the istarmap.py-file and import the module to apply the patch before you make your regular multiprocessing-imports.

    Python <3.8

    # istarmap.py for Python <3.8
    import multiprocessing.pool as mpp
    
    
    def istarmap(self, func, iterable, chunksize=1):
        """starmap-version of imap
        """
        if self._state != mpp.RUN:
            raise ValueError("Pool not running")
    
        if chunksize < 1:
            raise ValueError(
                "Chunksize must be 1+, not {0:n}".format(
                    chunksize))
    
        task_batches = mpp.Pool._get_tasks(func, iterable, chunksize)
        result = mpp.IMapIterator(self._cache)
        self._taskqueue.put(
            (
                self._guarded_task_generation(result._job,
                                              mpp.starmapstar,
                                              task_batches),
                result._set_length
            ))
        return (item for chunk in result for item in chunk)
    
    
    mpp.Pool.istarmap = istarmap
    

    Python 3.8+

    # istarmap.py for Python 3.8+
    import multiprocessing.pool as mpp
    
    
    def istarmap(self, func, iterable, chunksize=1):
        """starmap-version of imap
        """
        self._check_running()
        if chunksize < 1:
            raise ValueError(
                "Chunksize must be 1+, not {0:n}".format(
                    chunksize))
    
        task_batches = mpp.Pool._get_tasks(func, iterable, chunksize)
        result = mpp.IMapIterator(self)
        self._taskqueue.put(
            (
                self._guarded_task_generation(result._job,
                                              mpp.starmapstar,
                                              task_batches),
                result._set_length
            ))
        return (item for chunk in result for item in chunk)
    
    
    mpp.Pool.istarmap = istarmap
    

    Then in your script:

    import istarmap  # import to apply patch
    from multiprocessing import Pool
    import tqdm    
    
    
    def foo(a, b):
        for _ in range(int(50e6)):
            pass
        return a, b    
    
    
    if __name__ == '__main__':
    
        with Pool(4) as pool:
            iterable = [(i, 'x') for i in range(10)]
            for _ in tqdm.tqdm(pool.istarmap(foo, iterable),
                               total=len(iterable)):
                pass
    
    0 讨论(0)
提交回复
热议问题