want to read more than 50,000 txt files and save them in linked list in C++

假装没事ソ 提交于 2019-12-26 15:07:56

问题


#include<iostream>
#include<windows.h>
#include<string>
#include<fstream>
using namespace std;
class linklist     //linked list class
{
    struct main_node;
    struct sub_node;

    struct main_node   // main node that only have head pointers in it
    {
        sub_node *head;
        main_node()
        {   head=NULL;  }
    };
    main_node array[26];
    struct sub_node
    {
        double frequency;
        string word;
        sub_node *next;
        sub_node()
        {   frequency=1;    word="";    next=NULL;  }
    };

public:
    void add_node(string phrase)
    {
        char alphabat1=phrase[0];
        if(isupper(alphabat1))
        {
            alphabat1=tolower(alphabat1);
        }
        if(!isalpha(alphabat1))
            return;

        sub_node*temp = new sub_node;
        temp->word = phrase;

        sub_node*current = array[alphabat1-97].head;

        if(current == NULL)
        array[alphabat1-97].head = temp;

        else
        {
            while(current -> next != NULL && phrase != current-> word)
            {   current= current->next; }

            if(current->word == phrase)
                current->frequency++;
            else
                current->next  = temp; //adding words to linklist
        }
    }

    void display()
    {
        for(int i=0;i<26;i++)
        {
        sub_node *temp=array[i].head;
        cout<<char(i+97)<<" -> ";
        while(temp!=NULL)
        {
            cout<<temp->word<<" ("<<temp->frequency<<")  ";
            temp=temp->next;
        }
        cout<<"\n";
        }
    }
void parsing_documents(char *path)
{
    char token[100];
    ifstream read;
    read.open(path);
    do
    {
        read>>token;    // parsing words
        add_node(token);    //sending words to linked list

    }
    while(!read.eof());
        read.clear();
        read.close();
}
void reading_directory()
{
 // code to read multiple files

   HANDLE          hFile;                // Handle to file
   WIN32_FIND_DATA FileInformation;      // File information
   char tempPattern[90];
   strcpy(tempPattern,"*.txt");
   hFile = ::FindFirstFile(tempPattern, &FileInformation);
   long count=0;
   if(hFile != INVALID_HANDLE_VALUE)
   {
        do
        {
            count++;
            cout<<"."<<count;
            this->parsing_documents( FileInformation.cFileName);
        }
        while(TRUE == ::FindNextFile(hFile, &FileInformation));
   } 
    ::FindClose(hFile);

}
};
void main()
{
    linklist member;
    member.reading_directory();
    member.display();
}

I am working on a project in which I have to read more than 50,000 text files parse their words and save them in a linked list in a sorted manner , i have made the code in C++. it's working quite efficiently but I have one problem in this regard that it is not reading the files correctly sometimes 3000 sometimes 4000. I have searched for it a lot but i couldn't succeed to find my fault . . here is my code in C++ if any body help me in this regard i would be very thankful


回答1:


!read.eof() only checks for end of file, not errors reading the file, such as a networked mounted file system not being ready, disk error, or lack of permission to read the file. You should check for all failures, with while(read) which has an overloaded operator to check everything for you. So, if the file fails, you stop trying to read from it. You should also check the status before trying to read from the file. As such, while(read) { ... } is preferable to the do/while loop. After the loop, you might issue a warning or error to the user of you did not reach the end of file !read.eof() so they can investigate that specific file.

Try to avoid char * and char [] as much possible as this is highly error prone. You have a char[100]. What happens if the string is longer than 100 characters? read >> token may overwrite the stack -- such as to damage the ifstream read.

Consider using std::list<sub_node> to avoid having to re-invent and re-debug the wheel? You would no longer need the next pointer as std::list already does that for you. This would leave far less code to debug.



来源:https://stackoverflow.com/questions/12429621/want-to-read-more-than-50-000-txt-files-and-save-them-in-linked-list-in-c

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!