Calculate special correlation distance matrix faster

ぃ、小莉子 提交于 2019-12-04 16:58:32

I think you need to do this with Cython, here is an example:

#cython: boundscheck=False, wraparound=False, cdivision=True

import numpy as np

cdef extern from "math.h":
    bint isnan(double x)
    double sqrt(double x)

def pair_correlation(double[:, ::1] x):
    cdef double[:, ::] res = np.empty((x.shape[0], x.shape[0]))
    cdef double u, v
    cdef int i, j, k, count
    cdef double du, dv, d, n, r
    cdef double sum_u, sum_v, sum_u2, sum_v2, sum_uv

    for i in range(x.shape[0]):
        for j in range(i, x.shape[0]):
            sum_u = sum_v = sum_u2 = sum_v2 = sum_uv = 0.0
            count = 0            
            for k in range(x.shape[1]):
                u = x[i, k]
                v = x[j, k]
                if u == u and v == v:
                    sum_u += u
                    sum_v += v
                    sum_u2 += u*u
                    sum_v2 += v*v
                    sum_uv += u*v
                    count += 1
            if count == 0:
                res[i, j] = res[j, i] = -9999
                continue

            um = sum_u / count
            vm = sum_v / count
            n = sum_uv - sum_u * vm - sum_v * um + um * vm * count
            du = sqrt(sum_u2 - 2 * sum_u * um + um * um * count) 
            dv = sqrt(sum_v2 - 2 * sum_v * vm + vm * vm * count)
            r = 1 - n / (du * dv)
            res[i, j] = res[j, i] = r
    return res.base

To check the output without NAN:

import numpy as np
from scipy.spatial.distance import pdist, squareform, correlation
x = np.random.rand(2000, 20)
np.allclose(pair_correlation(x), squareform(pdist(x, "correlation")))

To check the output with NAN:

x = np.random.rand(2000, 20)
x[x < 0.3] = np.nan
r = pair_correlation(x)

i, j = 200, 60 # change this
mask = ~(np.isnan(x[i]) | np.isnan(x[j]))
u = x[i, mask]
v = x[j, mask]
assert abs(correlation(u, v) - r[i, j]) < 1e-12
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!