c++ - 想要读取超过 50,000 个 txt 文件并将它们保存在 C++ 中的链表中

Question

#include<iostream>
#include<windows.h>
#include<string>
#include<fstream>
using namespace std;
class linklist     //linked list class
{
    struct main_node;
    struct sub_node;

    struct main_node   // main node that only have head pointers in it
    {
        sub_node *head;
        main_node()
        {   head=NULL;  }
    };
    main_node array[26];
    struct sub_node
    {
        double frequency;
        string word;
        sub_node *next;
        sub_node()
        {   frequency=1;    word="";    next=NULL;  }
    };

public:
    void add_node(string phrase)
    {
        char alphabat1=phrase[0];
        if(isupper(alphabat1))
        {
            alphabat1=tolower(alphabat1);
        }
        if(!isalpha(alphabat1))
            return;

        sub_node*temp = new sub_node;
        temp->word = phrase;

        sub_node*current = array[alphabat1-97].head;

        if(current == NULL)
        array[alphabat1-97].head = temp;

        else
        {
            while(current -> next != NULL && phrase != current-> word)
            {   current= current->next; }

            if(current->word == phrase)
                current->frequency++;
            else
                current->next  = temp; //adding words to linklist
        }
    }

    void display()
    {
        for(int i=0;i<26;i++)
        {
        sub_node *temp=array[i].head;
        cout<<char(i+97)<<" -> ";
        while(temp!=NULL)
        {
            cout<<temp->word<<" ("<<temp->frequency<<")  ";
            temp=temp->next;
        }
        cout<<"\n";
        }
    }
void parsing_documents(char *path)
{
    char token[100];
    ifstream read;
    read.open(path);
    do
    {
        read>>token;    // parsing words
        add_node(token);    //sending words to linked list

    }
    while(!read.eof());
        read.clear();
        read.close();
}
void reading_directory()
{
 // code to read multiple files

   HANDLE          hFile;                // Handle to file
   WIN32_FIND_DATA FileInformation;      // File information
   char tempPattern[90];
   strcpy(tempPattern,"*.txt");
   hFile = ::FindFirstFile(tempPattern, &FileInformation);
   long count=0;
   if(hFile != INVALID_HANDLE_VALUE)
   {
        do
        {
            count++;
            cout<<"."<<count;
            this->parsing_documents( FileInformation.cFileName);
        }
        while(TRUE == ::FindNextFile(hFile, &FileInformation));
   } 
    ::FindClose(hFile);

}
};
void main()
{
    linklist member;
    member.reading_directory();
    member.display();
}

我正在做一个项目，我必须阅读超过 50,000 个文本文件来解析它们的单词并将它们以排序方式保存在链表中，我已经用 C++ 编写了代码。它的工作效率很高，但在这方面我有一个问题，它无法正确读取文件，有时 3000 有时 4000。我已经搜索了很多，但我无法找到我的错误。. 这是我在 C++ 中的代码，如果有人在这方面帮助我，我将非常感激

score 1 · Accepted Answer

!read.eof()仅检查文件结尾，而不是读取文件的错误，例如网络挂载的文件系统未准备好、磁盘错误或缺少读取文件的权限。您应该检查所有故障，while(read)其中有一个重载的运算符来为您检查所有内容。因此，如果文件失败，您将停止尝试从中读取。您还应该在尝试从文件中读取之前检查状态。因此，while(read) { ... }它比 do/while 循环更可取。在循环之后，您可能会向您的用户发出警告或错误，因为您没有到达文件末尾，!read.eof()以便他们可以调查该特定文件。

尽量避免char *，char []因为这很容易出错。你有一个 char[100]。如果字符串长度超过 100 个字符会怎样？ read >> token可能会覆盖堆栈——例如损坏ifstream read.

考虑使用std::list<sub_node>以避免重新发明和重新调试轮子？您将不再需要下一个指针，因为 std::list 已经为您完成了。这将留下更少的代码来调试。

c++ - 想要读取超过 50,000 个 txt 文件并将它们保存在 C++ 中的链表中

1 回答 1

Related

Reference