Shared-memory objects in multiprocessing

后端 未结 4 1619
再見小時候
再見小時候 2020-11-22 17:04

Suppose I have a large in memory numpy array, I have a function func that takes in this giant array as input (together with some other parameters). func

4条回答
  •  攒了一身酷
    2020-11-22 17:28

    I run into the same problem and wrote a little shared-memory utility class to work around it.

    I'm using multiprocessing.RawArray (lockfree), and also the access to the arrays is not synchronized at all (lockfree), be careful not to shoot your own feet.

    With the solution I get speedups by a factor of approx 3 on a quad-core i7.

    Here's the code: Feel free to use and improve it, and please report back any bugs.

    '''
    Created on 14.05.2013
    
    @author: martin
    '''
    
    import multiprocessing
    import ctypes
    import numpy as np
    
    class SharedNumpyMemManagerError(Exception):
        pass
    
    '''
    Singleton Pattern
    '''
    class SharedNumpyMemManager:    
    
        _initSize = 1024
    
        _instance = None
    
        def __new__(cls, *args, **kwargs):
            if not cls._instance:
                cls._instance = super(SharedNumpyMemManager, cls).__new__(
                                    cls, *args, **kwargs)
            return cls._instance        
    
        def __init__(self):
            self.lock = multiprocessing.Lock()
            self.cur = 0
            self.cnt = 0
            self.shared_arrays = [None] * SharedNumpyMemManager._initSize
    
        def __createArray(self, dimensions, ctype=ctypes.c_double):
    
            self.lock.acquire()
    
            # double size if necessary
            if (self.cnt >= len(self.shared_arrays)):
                self.shared_arrays = self.shared_arrays + [None] * len(self.shared_arrays)
    
            # next handle
            self.__getNextFreeHdl()        
    
            # create array in shared memory segment
            shared_array_base = multiprocessing.RawArray(ctype, np.prod(dimensions))
    
            # convert to numpy array vie ctypeslib
            self.shared_arrays[self.cur] = np.ctypeslib.as_array(shared_array_base)
    
            # do a reshape for correct dimensions            
            # Returns a masked array containing the same data, but with a new shape.
            # The result is a view on the original array
            self.shared_arrays[self.cur] = self.shared_arrays[self.cnt].reshape(dimensions)
    
            # update cnt
            self.cnt += 1
    
            self.lock.release()
    
            # return handle to the shared memory numpy array
            return self.cur
    
        def __getNextFreeHdl(self):
            orgCur = self.cur
            while self.shared_arrays[self.cur] is not None:
                self.cur = (self.cur + 1) % len(self.shared_arrays)
                if orgCur == self.cur:
                    raise SharedNumpyMemManagerError('Max Number of Shared Numpy Arrays Exceeded!')
    
        def __freeArray(self, hdl):
            self.lock.acquire()
            # set reference to None
            if self.shared_arrays[hdl] is not None: # consider multiple calls to free
                self.shared_arrays[hdl] = None
                self.cnt -= 1
            self.lock.release()
    
        def __getArray(self, i):
            return self.shared_arrays[i]
    
        @staticmethod
        def getInstance():
            if not SharedNumpyMemManager._instance:
                SharedNumpyMemManager._instance = SharedNumpyMemManager()
            return SharedNumpyMemManager._instance
    
        @staticmethod
        def createArray(*args, **kwargs):
            return SharedNumpyMemManager.getInstance().__createArray(*args, **kwargs)
    
        @staticmethod
        def getArray(*args, **kwargs):
            return SharedNumpyMemManager.getInstance().__getArray(*args, **kwargs)
    
        @staticmethod    
        def freeArray(*args, **kwargs):
            return SharedNumpyMemManager.getInstance().__freeArray(*args, **kwargs)
    
    # Init Singleton on module load
    SharedNumpyMemManager.getInstance()
    
    if __name__ == '__main__':
    
        import timeit
    
        N_PROC = 8
        INNER_LOOP = 10000
        N = 1000
    
        def propagate(t):
            i, shm_hdl, evidence = t
            a = SharedNumpyMemManager.getArray(shm_hdl)
            for j in range(INNER_LOOP):
                a[i] = i
    
        class Parallel_Dummy_PF:
    
            def __init__(self, N):
                self.N = N
                self.arrayHdl = SharedNumpyMemManager.createArray(self.N, ctype=ctypes.c_double)            
                self.pool = multiprocessing.Pool(processes=N_PROC)
    
            def update_par(self, evidence):
                self.pool.map(propagate, zip(range(self.N), [self.arrayHdl] * self.N, [evidence] * self.N))
    
            def update_seq(self, evidence):
                for i in range(self.N):
                    propagate((i, self.arrayHdl, evidence))
    
            def getArray(self):
                return SharedNumpyMemManager.getArray(self.arrayHdl)
    
        def parallelExec():
            pf = Parallel_Dummy_PF(N)
            print(pf.getArray())
            pf.update_par(5)
            print(pf.getArray())
    
        def sequentialExec():
            pf = Parallel_Dummy_PF(N)
            print(pf.getArray())
            pf.update_seq(5)
            print(pf.getArray())
    
        t1 = timeit.Timer("sequentialExec()", "from __main__ import sequentialExec")
        t2 = timeit.Timer("parallelExec()", "from __main__ import parallelExec")
    
        print("Sequential: ", t1.timeit(number=1))    
        print("Parallel: ", t2.timeit(number=1))
    

提交回复
热议问题