What is the fastest way to count lines and words in a text file in ANSI C?

前端 未结 3 1559
甜味超标
甜味超标 2020-12-04 03:29

What is the fastest way to count lines and words in a text file in pure ANSI C?

A word is terminated by a space or period. Line is terminated by \'\\n\'

相关标签:
3条回答
  • 2020-12-04 03:46

    Here is an explicit answer that counts the number of lines (extension to the number of words is trivial à la the C++ version linked to in OP). This version is buffered. Another answer suggests reading the entire file in first, which is simpler, but the below is more in line with what your C++ example does.

    #include <stdio.h>
    #include <string.h>
    
    #define BUFSIZE 1024
    
    int main(int argc, char** argv)
    {
      int newlines = 0;
      char buf[BUFSIZE];
      FILE* file;
    
      if (argc != 2)
        return 1;
    
      file = fopen(argv[1], "r");
      while (fgets(buf, BUFSIZE, file))
      {
        if (!(strlen(buf) == BUFSIZE-1 && buf[BUFSIZE-2] != '\n'))
          newlines++;
      }
    
      printf("Number of lines in %s: %d\n", argv[1], newlines);
    
      return 0;
    }
    

    The BUFSIZE macro can be tweaked to maximize performance (since you say you want the fastest way). 1024 is simply a guess. Another possibility is probably to read the file memory mapped, but I didn't try since mmap is not ANSI C.

    0 讨论(0)
  • 2020-12-04 03:49

    Maybe take a look at the source code of the GNU wc utility as this utility does exactly what you want.

    #include <stdlib.h>
    #include <stdio.h>
    #include <stdarg.h>
    
    typedef unsigned long count_t;  /* Counter type */
    
    /* Current file counters: chars, words, lines */
    count_t ccount;
    count_t wcount;
    count_t lcount;
    
    /* Totals counters: chars, words, lines */
    count_t total_ccount = 0;
    count_t total_wcount = 0;
    count_t total_lcount = 0;
    
    /* Print error message and exit with error status. If PERR is not 0,
       display current errno status. */
    static void
    error_print (int perr, char *fmt, va_list ap)
    {
      vfprintf (stderr, fmt, ap);
      if (perr)
        perror (" ");
      else
        fprintf (stderr, "\n");
      exit (1);  
    }
    
    /* Print error message and exit with error status. */
    static void
    errf (char *fmt, ...)
    {
      va_list ap;
    
      va_start (ap, fmt);
      error_print (0, fmt, ap);
      va_end (ap);
    }
    
    /* Print error message followed by errno status and exit
       with error code. */
    static void
    perrf (char *fmt, ...)
    {
      va_list ap;
    
      va_start (ap, fmt);
      error_print (1, fmt, ap);
      va_end (ap);
    }
    
    /* Output counters for given file */
    void
    report (char *file, count_t ccount, count_t wcount, count_t lcount)
    {
      printf ("%6lu %6lu %6lu %s\n", lcount, wcount, ccount, file);
    }
    
    /* Return true if C is a valid word constituent */
    static int
    isword (unsigned char c)
    {
      return isalpha (c);
    }
    
    /* Increase character and, if necessary, line counters */
    #define COUNT(c)       \
          ccount++;        \
          if ((c) == '\n') \
            lcount++;
    
    /* Get next word from the input stream. Return 0 on end
       of file or error condition. Return 1 otherwise. */
    int
    getword (FILE *fp)
    {
      int c;
      int word = 0;
    
      if (feof (fp))
        return 0;
    
      while ((c = getc (fp)) != EOF)
        {
          if (isword (c))
            {
              wcount++;
              break;
            }
          COUNT (c);
        }
    
      for (; c != EOF; c = getc (fp))
        {
          COUNT (c);
          if (!isword (c))
            break;
        }
    
      return c != EOF;
    }
    
    /* Process file FILE. */
    void
    counter (char *file)
    {
      FILE *fp = fopen (file, "r");
    
      if (!fp)
        perrf ("cannot open file `%s'", file);
    
      ccount = wcount = lcount = 0;
      while (getword (fp))
        ;
      fclose (fp);
    
      report (file, ccount, wcount, lcount);
      total_ccount += ccount;
      total_wcount += wcount;
      total_lcount += lcount;
    }
    
    int
    main (int argc, char **argv)
    {
      int i;
    
      if (argc < 2)
        errf ("usage: wc FILE [FILE...]");
    
      for (i = 1; i < argc; i++)
        counter (argv[i]);
    
      if (argc > 2)
        report ("total", total_ccount, total_wcount, total_lcount);
      return 0;
    }
    

    Found at: http://www.gnu.org/software/cflow/manual/html_node/Source-of-wc-command.html

    0 讨论(0)
  • 2020-12-04 03:53
    • Read the file in
    • Iterate over characters increment character counter
    • Check if space/end of line increment word counter
    • Repeat second and third steps until EOF
    0 讨论(0)
提交回复
热议问题