Count letters in a text file

后端未结

关注

 8  1092

I am a beginner python programmer and I am trying to make a program which counts the numbers of letters in a text file. Here is what I\'ve got so far:

import


                      
              相关标签:


      
      
        
          8条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  醉话见心        
                
              
                            
                2020-12-06 21:54
              
            
            
                                                                       
You could split the problem into two simpler tasks:

#!/usr/bin/env python
import fileinput # accept input from stdin and/or files specified at command-line
from collections import Counter
from itertools import chain
from string import ascii_lowercase

# 1. count frequencies of all characters (bytes on Python 2)
freq = Counter(chain.from_iterable(fileinput.input())) # read one line at a time

# 2. print frequencies of ascii letters
for c in ascii_lowercase:
     n = freq[c] + freq[c.upper()] # merge lower- and upper-case occurrences
     if n != 0:
        print(c, n)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  天命终不由人        
                
              
                            
                2020-12-06 21:56
              
            
            
                                                                       
import sys

def main():
    try:
         fileCountAllLetters = file(sys.argv[1], 'r')
         print "Count all your letters: ", len(fileCountAllLetters.read())
    except IndexError:
         print "You forget add file in argument!"
    except IOError:
         print "File like this not your folder!"

main()



  python file.py countlettersfile.txt

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  囚心锁ツ        
                
              
                            
                2020-12-06 22:04
              
            
            
                                                                       
Yet another way:

import sys
from collections import defaultdict

read_chunk_size = 65536

freq = defaultdict(int)
for c in sys.stdin.read(read_chunk_size):
    freq[ord(c.lower())] += 1

for symbol, count in sorted(freq.items(), key=lambda kv: kv[1], reverse=True):
    print(chr(symbol), count)



It outputs the symbols most frequent to the least.

The character counting loop is O(1) complexity and can handle arbitrarily large files because it reads the file in read_chunk_size chunks.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  旧时难觅i        
                
              
                            
                2020-12-06 22:05
              
            
            
                                                                       
You have to use collections.Counter

from collections import Counter
text = 'aaaaabbbbbccccc'
c = Counter(text)
print c


It prints:

Counter({'a': 5, 'c': 5, 'b': 5})


Your text variable should be:

import string
text = open('text.txt').read()
# Filter all characters that are not letters.
text = filter(lambda x: x in string.letters, text.lower())


For getting the output you need:

for letter, repetitions in c.iteritems():
    print letter, repetitions


In my example it prints:

a 5
c 5
b 5


For more information Counters doc
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  庸人自扰        
                
              
                            
                2020-12-06 22:06
              
            
            
                                                                       
Using re:

import re

context, m = 'some file to search or text', {}
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
for i in range(len(letters)):
  m[letters[i]] = len(re.findall('{0}'.format(letters[i]), context))
  print '{0} -> {1}'.format(letters[i], m[letters[i]])


It is much more elegant and clean with Counter nonetheless.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  小鲜肉        
                
              
                            
                2020-12-06 22:07
              
            
            
                                                                       
import string
fp=open('text.txt','r')
file_list=fp.readlines()
print file_list
freqs = {}
for line in file_list:
    line = filter(lambda x: x in string.letters, line.lower())
    for char in line:
        if char in freqs:
            freqs[char] += 1
        else:
            freqs[char] = 1

print freqs

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
   
          
     1
2
下一页
           
           
        
                                  
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复