I\'m building an app that gets incoming SMSs, then based on a keyword, it looks to see if that keyword is associated with any campaigns that it is running. The way I\'m doin
I use levenshtein distance to solve similar problem see http://en.wikipedia.org/wiki/Levenshtein_distance
def distance(u1, u2):
try:
s1 = unicode(u1)
s2 = unicode(u2)
except:
s1 = u1
s2 = u2
if len(s1) < len(s2):
return distance(u2, u1)
if not s1:
return len(s2)
previous_row = xrange(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
deletions = current_row[j] + 1 # than s2
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
distance("hamstir", "hamster") < 3
True
distance("god", "hamster") < 3
False