I am having trouble storing a numpy csr_matrix with PyTables. I\'m getting this error:
TypeError: objects of type ``csr_matrix`` are not supported in this co
I have updated Pietro Battiston's excellent answer for Python 3.6 and PyTables 3.x, as some PyTables function names have changed in the upgrade from 2.x.
import numpy as np
from scipy import sparse
import tables
def store_sparse_mat(M, name, filename='store.h5'):
"""
Store a csr matrix in HDF5
Parameters
----------
M : scipy.sparse.csr.csr_matrix
sparse matrix to be stored
name: str
node prefix in HDF5 hierarchy
filename: str
HDF5 filename
"""
assert(M.__class__ == sparse.csr.csr_matrix), 'M must be a csr matrix'
with tables.open_file(filename, 'a') as f:
for attribute in ('data', 'indices', 'indptr', 'shape'):
full_name = f'{name}_{attribute}'
# remove existing nodes
try:
n = getattr(f.root, full_name)
n._f_remove()
except AttributeError:
pass
# add nodes
arr = np.array(getattr(M, attribute))
atom = tables.Atom.from_dtype(arr.dtype)
ds = f.create_carray(f.root, full_name, atom, arr.shape)
ds[:] = arr
def load_sparse_mat(name, filename='store.h5'):
"""
Load a csr matrix from HDF5
Parameters
----------
name: str
node prefix in HDF5 hierarchy
filename: str
HDF5 filename
Returns
----------
M : scipy.sparse.csr.csr_matrix
loaded sparse matrix
"""
with tables.open_file(filename) as f:
# get nodes
attributes = []
for attribute in ('data', 'indices', 'indptr', 'shape'):
attributes.append(getattr(f.root, f'{name}_{attribute}').read())
# construct sparse matrix
M = sparse.csr_matrix(tuple(attributes[:3]), shape=attributes[3])
return M