C Program to count the word frequency in a text file

三世轮回 提交于 2020-01-03 04:46:15

问题


I need to be able to write a code in C programming that can read the text file and find how many of each word there is and output the word and how many times it occurs. Right now I have code that will print out each word and how many times it occurs, but I need it to print in alphabetical order and to ignore the uppercase letters. For example, "It" and "it" should be counted as the same word. I'm not sure where in my code to include the revisions. Below is an example of my code.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
    if (argc == 1) {
        printf("The input file name has not been provided\n");
    } else if (argc == 2) {
        FILE *f = fopen(argv[1], "rb");
        fseek(f, 0, SEEK_END);
        long fsize = ftell(f);
        fseek(f, 0, SEEK_SET);

        char *str = malloc(fsize + 1);
        fread(str, fsize, 1, f);
        fclose(f);

        str[fsize] = 0;
        int count = 0, c = 0, i, j = 0, k, space = 0;
        char p[1000][512], str1[512], ptr1[1000][512];
        char *ptr;
        for (i = 0; i < strlen(str); i++) {
            if ((str[i] == ' ') || (str[i] == ',') || (str[i] == '.')) {
                space++;
            }
        }
        for (i = 0, j = 0, k = 0; j < strlen(str); j++) {
            if ((str[j] == ' ') || (str[j] == 44) || (str[j] == 46)) {
                p[i][k] = '\0';
                i++;
                k = 0;
            } else
                p[i][k++] = str[j];
        }
        k = 0;
        for (i = 0; i <= space; i++) {
            for (j = 0; j <= space; j++) {
                if (i == j) {
                    strcpy(ptr1[k], p[i]);
                    k++;
                    count++;
                    break;
                } else {
                    if (strcmp(ptr1[j], p[i]) != 0)
                        continue;
                    else
                        break;
                }
            }
        }
        for (i = 0; i < count; i++) {
            for (j = 0; j <= space; j++) {
                if (strcmp(ptr1[i], p[j]) == 0)
                    c++;
            }
            printf("%s %d \n", ptr1[i], c);
            c = 0;
        }
    }
    return 0;
}

回答1:


Here is a minimal proposition, your code would probably need to be broken down into functions, but consider this is just some kind of draft proposition. You can simply replace your strcmp but strcasecmp for the case sensitive part.

Then for sorting, you can use qsort: define a function for comparison like:

int compar(const void *a, const void *b)
{
        return *(char *)a - *(char *)b;
}

and apply it on you word array. As far as I understood, ptr1 seems to hold your words, so you may add

   qsort(ptr1, count, sizeof(ptr1[0]), compar);

before your last for loop.

Nevertheless it seems to me that you need to fix you extracting loop as valgrind reports some errors in your code.




回答2:


Here is sample function using milkstrings functions

    typedef struct tWord {
      tXt tx ;
      int count ;
      struct tWord *   next ; } tWord;
    typedef struct tWord *pWord;

    void wordfrequency(void) {
      pWord np,prev,bprev,most,allword ;
      allword = NULL ;
      FILE * fi = fopen("sample.txt","r") ;
      if (fi == NULL)
        return ;
      tXt rlin = " " ;
      while (rlin != txtEndFile) {
        tXt lin = txtUpcase(txtTrim(rlin = txtFromFile(fi))) ;
        while (lin[0]) {
          tXt wrd = txtTrim(txtEat(&lin,' ')) ;
          np = allword ;
          while (np) {
            if (strcmp(np->tx,wrd) == 0) {
              np->count++ ;
              wrd = "" ; 
              np = NULL ;
              }
            else
              np = np->next ;}
          if (wrd[0]) {
            np = (pWord) malloc(sizeof(tWord)) ;
            np->tx = fridge(wrd) ;
            np->count = 1 ;
            np->next = allword ;
            allword = np ; } } }
      while (allword) {
        prev = bprev = NULL ;
        np = most = allword ;
        while (np) {
          if (strcmp(most->tx , np->tx) >0 ) {
            most = np ;
            bprev = prev ; }
          prev = np ;
          np = np->next ; }
        printf("%5d %s\n",most->count,unfridge(most->tx)) ;
        if (bprev)
          bprev->next = most->next ;
        else
          allword = most->next ;
        free (most) ;  }
      fclose(fi) ;
      if (txtAnyError())
        printf("%s\n",txtLastError()) ;

    }


来源:https://stackoverflow.com/questions/34070657/c-program-to-count-the-word-frequency-in-a-text-file

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!