I want to compute an md5 hash not of a string, but of an entire data structure. I understand the mechanics of a way to do this (dispatch on the type of the value, canonical
I ended up writing it myself as I thought I would have to:
class Hasher(object):
"""Hashes Python data into md5."""
def __init__(self):
self.md5 = md5()
def update(self, v):
"""Add `v` to the hash, recursively if needed."""
self.md5.update(str(type(v)))
if isinstance(v, basestring):
self.md5.update(v)
elif isinstance(v, (int, long, float)):
self.update(str(v))
elif isinstance(v, (tuple, list)):
for e in v:
self.update(e)
elif isinstance(v, dict):
keys = v.keys()
for k in sorted(keys):
self.update(k)
self.update(v[k])
else:
for k in dir(v):
if k.startswith('__'):
continue
a = getattr(v, k)
if inspect.isroutine(a):
continue
self.update(k)
self.update(a)
def digest(self):
"""Retrieve the digest of the hash."""
return self.md5.digest()