Efficient way to compute the Vandermonde matrix

后端 未结 2 2197
温柔的废话
温柔的废话 2020-12-16 17:03

I\'m calculating Vandermonde matrix for a fairly large 1D array. The natural and clean way to do this is using np.vander(). However, I found that this is approx. 2.5

2条回答
  •  忘掉有多难
    2020-12-16 17:25

    Here are some more methods some of which are quite a bit faster (on my computer) than what has been posted so far.

    The most important observation I think is that it really depends a lot on how many degrees you want. Exponentiation (which I believe is special cased for small integer exponents) only makes sense for small exponent ranges. The more exponents the better multiplication based approaches fare.

    I'd like to highlight a multiply.accumulate based method (ma) which is similar to numpy's builtin approach but faster (and not because I skimped on checks - nnc, numpy-no-checks demonstrates this). For all but the smallest exponent ranges it is actually the fastest for me.

    For reasons I do not understand, the numpy implementation does three things that are to the best of my knowledge slow and unnecessary: (1) It makes quite a few copies of the base vector. (2) It makes them non-contiguous. (3) It does the accumulation in-place which I believe forces buffering.

    Another thing I'd like to mention is that the fastest for small ranges of ints (out_e_1 essentially a manual version of ma), is slowed down by a factor of more than two by the simple precaution of promoting to a larger dtype (safe_e_1 arguably a bit of a misnomer).

    The broadcasting based methods are called bc_* where * indicates the broadcast axis (b for base, e for exp) 'cheat' means the result is noncontiguous.

    Timings (best of 3):

    rep=100 n_b=5000 n_e=4 b_tp= e_tp=
    vander                0.16699657 ms
    bc_b                  0.09595204 ms
    bc_e                  0.07959786 ms
    ma                    0.10755240 ms
    nnc                   0.16459018 ms
    out_e_1               0.02037535 ms
    out_e_2               0.02656622 ms
    safe_e_1              0.04652272 ms
    safe_e_2              0.04081079 ms
    cheat bc_e_cheat            0.04668466 ms
    rep=100 n_b=5000 n_e=8 b_tp= e_tp=
    vander                0.25086462 ms
    bc_b             apparently failed
    bc_e             apparently failed
    ma                    0.15843041 ms
    nnc                   0.24713077 ms
    out_e_1          apparently failed
    out_e_2          apparently failed
    safe_e_1              0.15970622 ms
    safe_e_2              0.19672418 ms
    bc_e_cheat       apparently failed
    rep=100 n_b=5000 n_e=4 b_tp= e_tp=
    vander                0.16225773 ms
    bc_b                  0.53315020 ms
    bc_e                  0.56200830 ms
    ma                    0.07626799 ms
    nnc                   0.16059748 ms
    out_e_1               0.03653416 ms
    out_e_2               0.04043702 ms
    safe_e_1              0.04060494 ms
    safe_e_2              0.04104209 ms
    cheat bc_e_cheat            0.52966076 ms
    rep=100 n_b=5000 n_e=8 b_tp= e_tp=
    vander                0.24542852 ms
    bc_b                  2.03353578 ms
    bc_e                  2.04281270 ms
    ma                    0.11075758 ms
    nnc                   0.24212880 ms
    out_e_1               0.14809043 ms
    out_e_2               0.19261359 ms
    safe_e_1              0.15206112 ms
    safe_e_2              0.19308420 ms
    cheat bc_e_cheat            1.99176601 ms
    

    Code:

    import numpy as np
    import types
    from timeit import repeat
    
    prom={np.dtype(np.int32): np.dtype(np.int64), np.dtype(float): np.dtype(float)}
    
    def RI(k, N, dt, top=100):
        return np.random.randint(0, top if top else N, (k, N)).astype(dt)
    
    def RA(k, N, dt, top=None):
        return np.add.outer(np.zeros((k,), int), np.arange(N)%(top if top else N)).astype(dt)
    
    def RU(k, N, dt, top=100):
        return (np.random.random((k, N))*(top if top else N)).astype(dt)
    
    def data(k, N_b, N_e, dt_b, dt_e, b_fun=RI, e_fun=RA):
        b = list(b_fun(k, N_b, dt_b))
        e = list(e_fun(k, N_e, dt_e))
        return b, e
    
    def f_vander(b, e):
        return np.vander(b, len(e), increasing=True)
    
    def f_bc_b(b, e):
        return b[:, None]**e
    
    def f_bc_e(b, e):
        return np.ascontiguousarray((b**e[:, None]).T)
    
    def f_ma(b, e):
        out = np.empty((len(b), len(e)), prom[b.dtype])
        out[:, 0] = 1
        np.multiply.accumulate(np.broadcast_to(b, (len(e)-1, len(b))), axis=0, out=out[:, 1:].T)
        return out
    
    def f_nnc(b, e):
        out = np.empty((len(b), len(e)), prom[b.dtype])
        out[:, 0] = 1
        out[:, 1:] = b[:, None]
        np.multiply.accumulate(out[:, 1:], out=out[:, 1:], axis=1)
        return out
    
    def f_out_e_1(b, e):
        out = np.empty((len(b), len(e)), b.dtype)
        out[:, 0] = 1
        out[:, 1] = b
        out[:, 2] = c = b*b
        for i in range(3, len(e)):
            c*=b
            out[:, i] = c
        return out
    
    def f_out_e_2(b, e):
        out = np.empty((len(b), len(e)), b.dtype)
        out[:, 0] = 1
        out[:, 1] = b
        out[:, 2] = b*b
        for i in range(3, len(e)):
            out[:, i] = out[:, i-1] * b
        return out
    
    def f_safe_e_1(b, e):
        out = np.empty((len(b), len(e)), prom[b.dtype])
        out[:, 0] = 1
        out[:, 1] = b
        out[:, 2] = c = (b*b).astype(prom[b.dtype])
        for i in range(3, len(e)):
            c*=b
            out[:, i] = c
        return out
    
    def f_safe_e_2(b, e):
        out = np.empty((len(b), len(e)), prom[b.dtype])
        out[:, 0] = 1
        out[:, 1] = b
        out[:, 2] = b*b
        for i in range(3, len(e)):
            out[:, i] = out[:, i-1] * b
        return out
    
    def f_bc_e_cheat(b, e):
        return (b**e[:, None]).T
    
    for params in [(100, 5000, 4, np.int32, np.int32),
                   (100, 5000, 8, np.int32, np.int32),
                   (100, 5000, 4, float, np.int32),
                   (100, 5000, 8, float, np.int32)]:
        k = params[0]
        dat = data(*params)
        ref = f_vander(dat[0][0], dat[1][0])
        print('rep={} n_b={} n_e={} b_tp={} e_tp={}'.format(*params))
        for name, func in list(globals().items()):
            if not name.startswith('f_') or not isinstance(func, types.FunctionType):
                continue
            try:
                assert np.allclose(ref, func(dat[0][0], dat[1][0]))
                if not func(dat[0][0], dat[1][0]).flags.c_contiguous:
                    print('cheat', end=' ')
                print("{:16s}{:16.8f} ms".format(name[2:], np.min(repeat(
                    'f(b.pop(), e.pop())', setup='b, e = data(*p)', globals={'f':func, 'data':data, 'p':params}, number=k)) * 1000 / k))
            except:
                print("{:16s} apparently failed".format(name[2:]))
    

提交回复
热议问题