I have two collections. One consists of m1 points in k dimensions and another one of m2 points in k dimensions. I n
This will do it for tensors of arbitrary dimensionality (i.e. containing (..., N, d) vectors). Note that it isn't between collections (i.e. not like scipy.spatial.distance.cdist) it's instead within a single batch of vectors (i.e. like scipy.spatial.distance.pdist)
import tensorflow as tf
import string
def pdist(arr):
"""Pairwise Euclidean distances between vectors contained at the back of tensors.
Uses expansion: (x - y)^T (x - y) = x^Tx - 2x^Ty + y^Ty
:param arr: (..., N, d) tensor
:returns: (..., N, N) tensor of pairwise distances between vectors in the second-to-last dim.
:rtype: tf.Tensor
"""
shape = tuple(arr.get_shape().as_list())
rank_ = len(shape)
N, d = shape[-2:]
# Build a prefix from the array without the indices we'll use later.
pref = string.ascii_lowercase[:rank_ - 2]
# Outer product of points (..., N, N)
xxT = tf.einsum('{0}ni,{0}mi->{0}nm'.format(pref), arr, arr)
# Inner product of points. (..., N)
xTx = tf.einsum('{0}ni,{0}ni->{0}n'.format(pref), arr, arr)
# (..., N, N) inner products tiled.
xTx_tile = tf.tile(xTx[..., None], (1,) * (rank_ - 1) + (N,))
# Build the permuter. (sigh, no tf.swapaxes yet)
permute = list(range(rank_))
permute[-2], permute[-1] = permute[-1], permute[-2]
# dists = (x^Tx - 2x^Ty + y^Tx)^(1/2). Note the axis swapping is necessary to 'pair' x^Tx and y^Ty
return tf.sqrt(xTx_tile - 2 * xxT + tf.transpose(xTx_tile, permute))