I need to find unique rows in a numpy.array
.
For example:
>>> a # I have
array([[1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 0, 0],
I've compared the suggested alternative for speed and found that, surprisingly, the void view unique
solution is even a bit faster than numpy's native unique
with the axis
argument. If you're looking for speed, you'll want
numpy.unique(
a.view(numpy.dtype((numpy.void, a.dtype.itemsize*a.shape[1])))
).view(a.dtype).reshape(-1, a.shape[1])
Code to reproduce the plot:
import numpy
import perfplot
def unique_void_view(a):
return numpy.unique(
a.view(numpy.dtype((numpy.void, a.dtype.itemsize*a.shape[1])))
).view(a.dtype).reshape(-1, a.shape[1])
def lexsort(a):
ind = numpy.lexsort(a.T)
return a[ind[
numpy.concatenate((
[True], numpy.any(a[ind[1:]] != a[ind[:-1]], axis=1)
))
]]
def vstack(a):
return numpy.vstack({tuple(row) for row in a})
def unique_axis(a):
return numpy.unique(a, axis=0)
perfplot.show(
setup=lambda n: numpy.random.randint(2, size=(n, 20)),
kernels=[unique_void_view, lexsort, vstack, unique_axis],
n_range=[2**k for k in range(15)],
logx=True,
logy=True,
xlabel='len(a)',
equality_check=None
)