-2

我必须阅读两个文本文件,然后将第二个文件中的单词与第一个文件进行比较。然后,我必须显示KnownWords两个文件中哪些是相同的单词,而其余不同的单词是UnknownWords。下一步是,我必须DisplayMostFreqKnownWords()在函数中显示最常见的已知词和未知词DisplayMostFreqUnknownWords()。我已经成功完成DisplayMostFreqKnownWords(),到目前为止输出还可以。DisplayMostFreqKnownWords()我从to复制了相同的代码,DisplayMostFreqUnknownWords()但在这个函数中,它没有在输出中显示任何内容。我不知道出了什么问题。有人能弄清楚这个吗。

输出是:

Displaying most frequent known words
       Word      Count
        the        19
          a        14
         of        11
 artificial        11
       that        10
         to         7
     signal         7
        and         7
         in         6
       they         5
Displaying most frequent unknown words
       Word      Count

头文件:

typedef map<string, vector<int> > WordMap;
typedef WordMap::iterator WordMapIter;

class WordStats
{
public:
    WordStats();
    void ReadDictionary();
    void DisplayDictionary();
    void ReadTxtFile();
    void DisplayKnownWordStats();
    void DisplayUnknownWordStats();
    void DisplayMostFreqKnownWords();
    void DisplayMostFreqUnknownWords();

private:
    WordMap KnownWords;
    WordMap UnknownWords;
    WordMapIter Paragraph;
    set<string> Dictionary;
    char Filename[256];
}

我的程序:

// Displays 10 most frequent words in KnownWords
void WordStats::DisplayMostFreqKnownWords(){
    int count;
    multimap<int,string > displayFreqWords;// new map with int as key 
    (multimap because key could occur more than once)
    multimap<int,string >::reverse_iterator rit = displayFreqWords.rbegin();
    for (Paragraph = KnownWords.begin();  Paragraph != KnownWords.end(); 
    ++Paragraph){ // iterate map again
        string word = (*Paragraph).first;
        int cnt = (*Paragraph).second.size();
        displayFreqWords.insert(pair<int,string>(cnt,word));
    }
//  multimap<int,string>::iterator rit; // iterator for new map
cout <<"           Word      Count\n";
for(; count<=10 && rit!=displayFreqWords.rend(); rit++, ++count){           
        string word = (*rit).second;
        int cnt = (*rit).first;
        cout << setw(15) << word << setw(10) << cnt << endl;
    }
}
// Displays 10 most frequent words in UnknownWords
void WordStats::DisplayMostFreqUnknownWords(){
    int count;
    multimap<int,string > displayFreqUnknownWords;
    multimap<int,string >::reverse_iterator rrit = 
    displayFreqUnknownWords.rbegin();
    for (Paragraph = UnknownWords.begin();  Paragraph != 
        UnknownWords.end(); ++Paragraph){ 
        string word = (*Paragraph).first;
        int cnt = (*Paragraph).second.size();
        displayFreqUnknownWords.insert(pair<int,string>(cnt,word));
}
//  multimap<int,string>::iterator rit; // iterator for new map
cout <<"           Word      Count\n";
for(; count<=10 && rrit!=displayFreqUnknownWords.rend(); rrit++, ++count){          
        string wrd = (*rrit).second;
        int ccnt = (*rrit).first;
        cout << setw(15) << wrd << setw(10) << ccnt << endl;
}
}
4

1 回答 1

0

这是一种表达我认为是您的用例的方法。我使用了 c++17 元组扩展。

我曾经用来unordered_map推断哪些词是已知的或未知的,两个multimaps 来确定已知和未知的词频。

希望它有帮助。

#include <sstream>
#include <tuple>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <iterator>
#include <map>
#include <iostream>
#include <iomanip>
#include <fstream>


// Set this to 1 to run a static test
#define TESTING 0


#if TESTING

using input_type = std::istringstream;

std::tuple<input_type, input_type> open_inputs() {
    return {
            std::istringstream("the big black cat sat on the grey mat"),
            std::istringstream("the gold small cat lay on the purple mat")
    };
}

#else

using input_type = std::ifstream;

std::tuple<input_type, input_type> open_inputs() {
    return {
            std::ifstream("left_file.txt"),
            std::ifstream("right_file.txt"),
    };
}

#endif

struct Counts {
    int left_count = 0, right_count = 0;

    int total() const {
        return left_count + right_count;
    }

    bool is_known() const {
        return left_count && right_count;
    }

};

template<class F>
void for_each_word_in_file(std::istream &is, F f) {
    std::for_each(std::istream_iterator<std::string>(is),
                  std::istream_iterator<std::string>(),
                  f);
}

int main() {

    // open files
    auto[left, right] = open_inputs();

    auto known_words = std::unordered_map<std::string, Counts>();

    // count words in each file

    for_each_word_in_file(left, [&known_words](auto &&word) {
        ++known_words[word].left_count;
    });

    for_each_word_in_file(right, [&known_words](auto &&word) {
        ++known_words[word].right_count;
    });

    // map counts to words, in descending order, allowing multiple entries of the same count

    std::multimap<int, std::string, std::greater<>> known_ordered, unknown_ordered;

    // iterate all words seen, putting into appropriate map

    for (auto&&[word, counts] : known_words) {
        (counts.is_known() ? known_ordered : unknown_ordered)
                .emplace(counts.total(), word);
    }

    // emit results

    std::cout << "Known words by frequency\n";
    for (auto&&[freq, word] : known_ordered) {
        std::cout << std::setw(15) << word << " " << freq << '\n';
    }

    std::cout << "\nUmknown words by frequency\n";
    for (auto&&[freq, word] : unknown_ordered) {
        std::cout << std::setw(15) << word << " " << freq << '\n';
    }
}
于 2018-05-09T12:23:04.893 回答