Python - Finding word frequencies of list of words in text file

后端 未结 4 1026
渐次进展
渐次进展 2020-12-01 12:46

I am trying to speed up my project to count word frequencies. I have 360+ text files, and I need to get the total number of words and the number of times each word from anot

4条回答
  •  佛祖请我去吃肉
    2020-12-01 13:49

    One possible implementation (using Counter)...

    Instead of printing the output, I think it would be simpler to write to a csv file and import that into Excel. Look at http://docs.python.org/2/library/csv.html and replace print_summary.

    import os
    from collections import Counter
    import glob
    
    def word_frequency(fileobj, words):
        """Build a Counter of specified words in fileobj"""
        # initialise the counter to 0 for each word
        ct = Counter(dict((w, 0) for w in words))
        file_words = (word for line in fileobj for word in line.split())
        filtered_words = (word for word in file_words if word in words)
        return Counter(filtered_words)
    
    
    def count_words_in_dir(dirpath, words, action=None):
        """For each .txt file in a dir, count the specified words"""
        for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
            with open(filepath) as f:
                ct = word_frequency(f, words)
                if action:
                    action(filepath, ct)
    
    
    def print_summary(filepath, ct):
        words = sorted(ct.keys())
        counts = [str(ct[k]) for k in words]
        print('{0}\n{1}\n{2}\n\n'.format(
            filepath,
            ', '.join(words),
            ', '.join(counts)))
    
    
    words = set(['inflation', 'jobs', 'output'])
    count_words_in_dir('./', words, action=print_summary)
    

提交回复
热议问题