Efficient outer product in python

后端 未结 3 1688
刺人心
刺人心 2020-11-30 12:41

Outer product in python seems quite slow when we have to deal with vectors of dimension of order 10k. Could someone please give me some idea how could I speed up this opera

3条回答
  •  臣服心动
    2020-11-30 13:18

    It doesn't really get any faster than that, these are your options:

    numpy.outer

    >>> %timeit np.outer(a,b)
    100 loops, best of 3: 9.79 ms per loop
    

    numpy.einsum

    >>> %timeit np.einsum('i,j->ij', a, b)
    100 loops, best of 3: 16.6 ms per loop
    

    numba

    from numba.decorators import autojit
    
    @autojit
    def outer_numba(a, b):
        m = a.shape[0]
        n = b.shape[0]
        result = np.empty((m, n), dtype=np.float)
        for i in range(m):
            for j in range(n):
                result[i, j] = a[i]*b[j]
        return result
    
    >>> %timeit outer_numba(a,b)
    100 loops, best of 3: 9.77 ms per loop
    

    parakeet

    from parakeet import jit
    
    @jit
    def outer_parakeet(a, b):
       ... same as numba
    
    >>> %timeit outer_parakeet(a, b)
    100 loops, best of 3: 11.6 ms per loop
    

    cython

    cimport numpy as np
    import numpy as np
    cimport cython
    ctypedef np.float64_t DTYPE_t
    
    @cython.boundscheck(False)
    @cython.wraparound(False)
    def outer_cython(np.ndarray[DTYPE_t, ndim=1] a, np.ndarray[DTYPE_t, ndim=1] b):
        cdef int m = a.shape[0]
        cdef int n = b.shape[0]
        cdef np.ndarray[DTYPE_t, ndim=2] result = np.empty((m, n), dtype=np.float64)
        for i in range(m):
            for j in range(n):
                result[i, j] = a[i]*b[j]
        return result
    
    >>> %timeit outer_cython(a, b)
    100 loops, best of 3: 10.1 ms per loop
    

    theano

    from theano import tensor as T
    from theano import function
    
    x = T.vector()
    y = T.vector()
    
    outer_theano = function([x, y], T.outer(x, y))
    
    >>> %timeit outer_theano(a, b)
    100 loops, best of 3: 17.4 ms per loop
    

    pypy

    # Same code as the `outer_numba` function
    >>> timeit.timeit("outer_pypy(a,b)", number=100, setup="import numpy as np;a = np.random.rand(128,);b = np.random.rand(32000,);from test import outer_pypy;outer_pypy(a,b)")*1000 / 100.0
    16.36 # ms
    

    Conclusions:

    ╔═══════════╦═══════════╦═════════╗
    ║  method   ║ time(ms)* ║ version ║
    ╠═══════════╬═══════════╬═════════╣
    ║ numba     ║ 9.77      ║ 0.16.0  ║
    ║ np.outer  ║ 9.79      ║ 1.9.1   ║
    ║ cython    ║ 10.1      ║ 0.21.2  ║
    ║ parakeet  ║ 11.6      ║ 0.23.2  ║
    ║ pypy      ║ 16.36     ║ 2.4.0   ║
    ║ np.einsum ║ 16.6      ║ 1.9.1   ║
    ║ theano    ║ 17.4      ║ 0.6.0   ║
    ╚═══════════╩═══════════╩═════════╝
    * less time = faster
    

提交回复
热议问题