I haven\'t found anything relevant on Google, so I\'m hoping to find some help here :)
I\'ve got a Python list as follows:
[[\'hoose\', 200], [\"Ba
import Levenshtein
import operator
import cluster
class Item(object):
@classmethod
def fromList(cls,lst):
return cls(lst[0][0], lst[0][1], lst[1])
def __init__(self, name, val=0, score=0):
super(Item,self).__init__()
self.name = name
self.val = val
self.score = score
def dist(self, other):
return 100 if other is self else Levenshtein.distance(self.name, other.name)
def __str__(self):
return "('{0}', {1})".format(self.name, self.val)
def main():
myList = [
[['hoose', 5], 200],
[['House', 5], 200],
[["Bananaphone", 5], 10],
[['trousers', 5], 100]
]
items = [Item.fromList(i) for i in myList]
cl = cluster.HierarchicalClustering(items, (lambda x,y: x.dist(y)))
for group in cl.getlevel(5):
groupScore = sum(item.score for item in group)
groupStr = ', '.join(str(item) for item in group)
print "{0}: {1}".format(groupScore, groupStr)
if __name__=="__main__":
main()
returns
10: ('Bananaphone', 5)
500: ('trousers', 5), ('hoose', 5), ('House', 5)