How do I pass large numpy arrays between python subprocesses without saving to disk?

后端 未结 6 1479
说谎
说谎 2020-11-29 21:21

Is there a good way to pass a large chunk of data between two python subprocesses without using the disk? Here\'s a cartoon example of what I\'m hoping to accomplish:

<
6条回答
  •  臣服心动
    2020-11-29 21:46

    Basically, you just want to share a block of memory between processes and view it as a numpy array, right?

    In that case, have a look at this (Posted to numpy-discussion by Nadav Horesh awhile back, not my work). There are a couple of similar implementations (some more flexible), but they all essentially use this principle.

    #    "Using Python, multiprocessing and NumPy/SciPy for parallel numerical computing"
    # Modified and corrected by Nadav Horesh, Mar 2010
    # No rights reserved
    
    
    import numpy as N
    import ctypes
    import multiprocessing as MP
    
    _ctypes_to_numpy = {
        ctypes.c_char   : N.dtype(N.uint8),
        ctypes.c_wchar  : N.dtype(N.int16),
        ctypes.c_byte   : N.dtype(N.int8),
        ctypes.c_ubyte  : N.dtype(N.uint8),
        ctypes.c_short  : N.dtype(N.int16),
        ctypes.c_ushort : N.dtype(N.uint16),
        ctypes.c_int    : N.dtype(N.int32),
        ctypes.c_uint   : N.dtype(N.uint32),
        ctypes.c_long   : N.dtype(N.int64),
        ctypes.c_ulong  : N.dtype(N.uint64),
        ctypes.c_float  : N.dtype(N.float32),
        ctypes.c_double : N.dtype(N.float64)}
    
    _numpy_to_ctypes = dict(zip(_ctypes_to_numpy.values(), _ctypes_to_numpy.keys()))
    
    
    def shmem_as_ndarray(raw_array, shape=None ):
    
        address = raw_array._obj._wrapper.get_address()
        size = len(raw_array)
        if (shape is None) or (N.asarray(shape).prod() != size):
            shape = (size,)
        elif type(shape) is int:
            shape = (shape,)
        else:
            shape = tuple(shape)
    
        dtype = _ctypes_to_numpy[raw_array._obj._type_]
        class Dummy(object): pass
        d = Dummy()
        d.__array_interface__ = {
            'data' : (address, False),
            'typestr' : dtype.str,
            'descr' :   dtype.descr,
            'shape' : shape,
            'strides' : None,
            'version' : 3}
        return N.asarray(d)
    
    def empty_shared_array(shape, dtype, lock=True):
        '''
        Generate an empty MP shared array given ndarray parameters
        '''
    
        if type(shape) is not int:
            shape = N.asarray(shape).prod()
        try:
            c_type = _numpy_to_ctypes[dtype]
        except KeyError:
            c_type = _numpy_to_ctypes[N.dtype(dtype)]
        return MP.Array(c_type, shape, lock=lock)
    
    def emptylike_shared_array(ndarray, lock=True):
        'Generate a empty shared array with size and dtype of a  given array'
        return empty_shared_array(ndarray.size, ndarray.dtype, lock)
    

提交回复
热议问题