1

我希望有人可以帮助我解决这个问题。我正在用 C++ 创建一个 HTML 词法分析器。据老师说我应该有3个文件。一个标头和 2 个主 .cpp,它应该能够读取文件 这是我的文件 try.txt

<<<<<Hello there <H1 style=”BOLD”&gt;header!!</H1> 
<<
<< =

这是我的标题

#ifndef tokens_h
#define tokens_h
#include <string>
#include <iostream>



        enum tokens {TEXT, LANGLE = 60, RANGLE = 62, SLASH = 47, ID, EQ = 61, QSTRING = 34, OTHER, END};

/* TEXT    = 0
   LANGLE  = 60
   RANGLE  = 62
   SLASH   = 47
   ID      = 48
   EQ      = 61
   QSTRING = 34
   OTHER   = 36
   END     = 36

*/
            int getToken(std::istream *br, std::string a);

#endif

这是我的 main.cpp

#include <iostream>
#include <fstream>
#include <vector>
#include "tokens.h"


using namespace std;

int main(int argc, char *argv[])
{
    //defineTokens();
    istream *br;
    ifstream infile;
    string output;
    int a;
    vector<int> count;
    int langle = 0;

            string line;
    if(argc == 1){
        while(cin.good() ){     //Get continous input
                br = &cin;

           getline(cin,line);
           getToken(br,line);
       }
    }
    else if(argc != 2){
        return 1;
    }else{
        infile.open(argv[1]);
        if( infile.is_open()){
            br = &infile;
            while(!infile.eof()){
            getline(infile,output);
        getToken(br,output);
            }
        }

        else{
            cout << argv[1] << "Can't Be Opened" << endl;
            return 1;
        }
    }
}

这是我的 tokens.cpp 我打印结果的地方

#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <map>
#include <utility>
#include "tokens.h"



using namespace std;

void compar(int ch)
{
    vector<int> text;
    vector<int> langle;
    //string langle;
    vector<int> rangle;
    vector<int> slash;
    vector<int> id;
    vector<int> eq;
    vector<int> qstring;
    vector<int> other;
    map <string, int> result;
    int c=0;
    int d=0;
    int sum;
    string r;

    switch(ch){
        case 60:static int countlangle = 0;
                countlangle ++;
                result["LANGLE"]= countlangle; 
                cout << "LANGLE: " << result["LANGLE"] << " ";
                break;

        case 62:static int countrangle = 0;
                countrangle ++;
                result["RANGLE"]= countrangle; 
                cout << "RANGLE: " << result["RANGLE"] << " ";
                break;

        case 47:static int countslash = 0;
                countslash ++;
                result["SLASH"]= countslash; 
                cout << "SLASH: " << result["SLASH"] << " ";
                break;      

        case 61:static int counteq = 0;
                counteq ++;
                result["EQ"]= counteq; 
                cout << "EQ: " << result["EQ"] << " ";
                break;                                      

        case 34:static int countqstring = 0;
                countqstring ++;
                result["QSTRING"]= countqstring; 
                cout << "QSTRING: " << result["QSTRING"] << " ";
                break;  
    }


}
int getToken(istream *br, string a)
{

    int b;
    string d = "no";
    string f = "no";
    string r;
    vector<char> st;
    vector<string> trial;
    vector<int> countr;
    vector<int> countl;
    vector<char> quotes;
    string ans;
    int x=0;

    r = a;
    cout << a[27];


     int found;
            found = a.find('\"');
                        cout << found<<"XXxxxxxX";  


        for(int i = 0; i< a.length();i++){  //read entire string
        if(a[i] == '<'){
            // cout << LANGLE << " "; 
            d="yes";
            x +=1;
            countr.push_back(LANGLE);
            //cout << count.size();
            //cout << x;
            compar(LANGLE);
            b =LANGLE;

    //    return LANGLE;
        }
        else if(a[i]== '>' && d == "yes"){
            f = "yes";
            b = RANGLE;  //assing to the int variable the value from the enum header
            compar(RANGLE);

        }
        else if(a[i]== '/' && d == "yes"){
            compar(SLASH);

        }
        else if(a[i] == '=' && d == "yes"){
            compar(EQ);

        }
        else if(a[found] == '\"' && d == "yes"){

           //   for(int k =0;k < quotes.size();k++)
            //cout << r[found] <<"XXX"; 
            compar(QSTRING);

        }   

        }
    return 0;
}

程序读取 <>= 没有问题,但是当我尝试使用 cout << a[27]; 读取 '\"' 时,我得到这个: ? 如果我打印 cout << a; 我得到<<<<<Hello there <H1 style=”BOLD”&gt;header!!</H1> // 这是字符串我正在努力阅读

当我使用found = a.find('\"');它时给我一个-1 我的问题是为什么我的程序无法识别引号?这是我阅读文件的方式吗?

提前致谢

4

1 回答 1

4

您的文件包含:

”</h1>

而您的词法分析器寻找:

"

这些是不同的。

于 2014-10-18T20:39:06.497 回答