from glob import glob
pattern = \"D:\\\\report\\\\shakeall\\\\*.txt\"
filelist = glob(pattern)
def countwords(fp):
with open(fp) as fh:
return len(fh.rea
import re
Then replace
[uniquewords.add(x) for x in open(os.path.join(root,name)).read().split()]
By
[uniquewords.add(re.sub('[^a-zA-Z0-9]*$', '', x) for x in open(os.path.join(root,name)).read().split()]
This will strip all trailing non-alphanumeric characters from each word before adding it to the set.