c++ - c++中的HTML词法分析器如何打印结果

Question

我希望有人能再次帮助我解决这个问题。我正在用 C++ 创建一个 HTML 词法分析器。据老师说我应该有3个文件。一个标头和 2 个主 .cpp，它应该能够读取文件 这是我的文件 try.txt

<<<<<Hello there <H1 style=”BOLD”&gt;header!!</H1> 
<<
<< =

这是我的标题

#ifndef tokens_h
#define tokens_h
#include <string>
#include <iostream>

            enum tokens {TEXT, LANGLE = 60, RANGLE = 62, SLASH = 47, ID, EQ = 61, QSTRING = 34, OTHER, END};

/* TEXT    = 0
   LANGLE  = 60
   RANGLE  = 62
   SLASH   = 47
   ID      = 48
   EQ      = 61
   QSTRING = 34
   OTHER   = 36
   END     = 36

*/
            int getToken(std::istream *br, std::string a);

#endif

这是我的 main.cpp

#include <iostream>
#include <fstream>
#include <vector>
#include "tokens.h"


using namespace std;

int main(int argc, char *argv[])
{
    //defineTokens();
    istream *br;
    ifstream infile;
    string output;
    int a;
    vector<int> count;
    int langle = 0;

            string line;
    if(argc == 1){
        while(cin.good() ){     //Get continous input
                br = &cin;

           getline(cin,line);
           getToken(br,line);
       }
    }
    else if(argc != 2){
        return 1;
    }else{
        infile.open(argv[1]);
        if( infile.is_open()){
            br = &infile;
            while(!infile.eof()){
            getline(infile,output);
        getToken(br,output);
            }
        }

        else{
            cout << argv[1] << "Can't Be Opened" << endl;
            return 1;
        }
    }
}

这是我的 tokens.cpp 我打印结果的地方

#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <map>
#include <utility>
#include "tokens.h"



using namespace std;

void compar(int ch)
{
    vector<int> text;
    vector<int> langle;
    //string langle;
    vector<int> rangle;
    vector<int> slash;
    vector<int> id;
    vector<int> eq;
    vector<int> qstring;
    vector<int> other;
    map <string, int> result;
    int c=0;
    int d=0;
    int sum;
    string r;

    switch(ch){
        case 60:static int countlangle = 0;
                countlangle ++;
                result["LANGLE"]= countlangle; 
                cout << "LANGLE: " << result["LANGLE"] << " ";
                break;

        case 62:static int countrangle = 0;
                countrangle ++;
                result["RANGLE"]= countrangle; 
                cout << "RANGLE: " << result["RANGLE"] << " ";
                break;

        case 47:static int countslash = 0;
                countslash ++;
                result["SLASH"]= countslash; 
                cout << "SLASH: " << result["SLASH"] << " ";
                break;      

        case 61:static int counteq = 0;
                counteq ++;
                result["EQ"]= counteq; 
                cout << "EQ: " << result["EQ"] << " ";
                break;                                      

        case 34:static int countqstring = 0;
                countqstring ++;
                result["QSTRING"]= countqstring; 
                cout << "QSTRING: " << result["QSTRING"] << " ";
                break;  
    }


}
int getToken(istream *br, string a)
{

    int b;
    string d = "no";
    string f = "no";
    string r;
    vector<char> st;
    vector<string> trial;
    vector<int> countr;
    vector<int> countl;
    vector<char> quotes;
    string ans;
    int x=0;

    r = a;
    cout << a[27];


     int found;
            found = a.find('\"');
                        cout << found<<"XXxxxxxX";  


        for(int i = 0; i< a.length();i++){  //read entire string
        if(a[i] == '<'){
            // cout << LANGLE << " "; 
            d="yes";
            x +=1;
            countr.push_back(LANGLE);
            //cout << count.size();
            //cout << x;
            compar(LANGLE);
            b =LANGLE;

    //    return LANGLE;
        }
        else if(a[i]== '>' && d == "yes"){
            f = "yes";
            b = RANGLE;  //assing to the int variable the value from the enum header
            compar(RANGLE);

        }
        else if(a[i]== '/' && d == "yes"){
            compar(SLASH);

        }
        else if(a[i] == '=' && d == "yes"){
            compar(EQ);

        }
        else if(a[found] == '\"' && d == "yes"){

           //   for(int k =0;k < quotes.size();k++)
            //cout << r[found] <<"XXX"; 
            compar(QSTRING);

        }   

        }
    return 0;
}

老师希望程序只打印每个案例的最终值。但是，当我打印时，我得到了这个

LANGLE: 1 ID: 1 EQ: 1 ID: 2 RANGLE: 1 ID: 3 LANGLE: 2 SLASH: 1 RANGLE: 2 LANGLE: 3 ID: 4 RANGLE: 3 LANGLE: 4 LANGLE: 5 LANGLE: 6 ID: 5 RANGLE: 4 LANGLE: 7 LANGLE: 8 EQ: 2

有没有办法得到这样的东西？

LANGLE = 8
RANGLE = 4
EQ = 2
SLASH = 1
ID = 4

提前致谢

score 0 · Accepted Answer

您compar()多次调用该函数，并在每次调用中打印当前令牌计数。这将为您提供每个令牌计数的每个中间值。

尝试修改您的compar()函数以仅增加计数。不要在其中打印值。解析完全部内容后，通过迭代映射来打印标记值对。

c++ - c++中的HTML词法分析器如何打印结果

1 回答 1

Related

Reference