I want to take every word from a text file, and count the word frequency in a dictionary.
Example: \'this is the textfile, and it is used to take words and co
My approach is to do few things from ground:
text = '''this is the textfile, and it is used to take words and count'''
word = '' #This will hold each word
wordList = [] #This will be collection of words
for ch in text: #traversing through the text character by character
#if character is between a-z or A-Z or 0-9 then it's valid character and add to word string..
if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9'):
word += ch
elif ch == ' ': #if character is equal to single space means it's a separator
wordList.append(word) # append the word in list
word = '' #empty the word to collect the next word
wordList.append(word) #the last word to append in list as loop ended before adding it to list
print(wordList)
wordCountDict = {} #empty dictionary which will hold the word count
for word in wordList: #traverse through the word list
if wordCountDict.get(word.lower(), 0) == 0: #if word doesn't exist then make an entry into dic with value 1
wordCountDict[word.lower()] = 1
else: #if word exist then increament the value by one
wordCountDict[word.lower()] = wordCountDict[word.lower()] + 1
print(wordCountDict)
Another approach:
text = '''this is the textfile, and it is used to take words and count'''
for ch in '.\'!")(,;:?-\n':
text = text.replace(ch, ' ')
wordsArray = text.split(' ')
wordDict = {}
for word in wordsArray:
if len(word) == 0:
continue
else:
wordDict[word.lower()] = wordDict.get(word.lower(), 0) + 1
print(wordDict)