I need to make a program that prints out the frequency of letters in a text file and compares that frequency with that of another in python.
So far I am able to prin
I think this is a really straight forward way to do it:
while True:
speech = raw_input("Enter file name:")
wholeFile = open(speech, 'r+').read()
lowlet = wholeFile.lower()
alphas = 'abcdefghijklmnopqrstuvwxyz'
# lets set default values first
occurrences = {letter : 0 for letter in alphas }
# occurrences = dict(zip(alphas, [0]*len(alphas))) # for python<=2.6
# total number of valid letters
total = 0
# iter everything in the text
for letter in lowlet:
# if it is a valid letter then it is in occurrences
if letter in occurrences:
# update counts
total += 1
occurrences[letter] += 1
# now print the results:
for letter, count in occurrences.iteritems():
print letter, (1.0*count/total)
As you notices you need the total number of valid letters in the text before you can calculate the frequency. Either you filter the text before processing it, or you combine the filtering with the processing, which is what I do here.
You could use the translator recipe to drop all characters not in alpha
.
Since doing so makes letters
contain nothing but characters from alpha
, n
is now the correct denominator.
You could then use a collections.defaultdict(int)
to count the occurrences of the letters:
import collections
import string
def translator(frm='', to='', delete='', keep=None):
# Python Cookbook Recipe 1.9
# Chris Perkins, Raymond Hettinger
if len(to) == 1: to = to * len(frm)
trans = string.maketrans(frm, to)
if keep is not None:
allchars = string.maketrans('', '')
# delete is expanded to delete everything except
# what is mentioned in set(keep)-set(delete)
delete = allchars.translate(allchars, keep.translate(allchars, delete))
def translate(s):
return s.translate(trans, delete)
return translate
alpha = 'abcdefghijklmnopqrstuvwxyz'
keep_alpha=translator(keep=alpha)
while True:
speech = raw_input("Enter file name:")
wholeFile = open(speech, 'r+').read()
lowlet = wholeFile.lower()
letters = keep_alpha(lowlet)
n = len(letters)
occurrences = collections.defaultdict(int)
for x in letters:
occurrences[x]+=1
for x in occurrences:
print x, occurrences[x], occurrences[x]/float(n)
import collections
import re
from __future__ import division
file1 = re.subn(r"\W", "", open("file1.txt", "r").read())[0].lower()
counter1 = collections.Counter(file1)
for k, v in counter1.iteritems():
counter1[k] = v / len(file1)
file2 = re.subn(r"\W", "", open("file2.txt", "r").read())[0].lower()
counter2 = collections.Counter(file2)
for k, v in counter2.iteritems():
counter2[k] = v / len(file2)
Note: requires Python 2.7.