我开始编写一个词法分析器,并制作了以下文件来测试到目前为止一切正常:
主文件
#include <iostream>
#include "Lexer.h"
#include "Token.h"
int main(void)
{
std::string str(""); // I'll use this to test expressions
Lexer lexer(str);
std::vector<Token> tokens = lexer.lex();
for(auto it = tokens.begin(); it != tokens.end(); ++it)
{
std::string str;
switch(it->type)
{
case TokenType::_EOF:
str = "EOF";
break;
case TokenType::ERROR:
str = "ERROR";
break;
case TokenType::SEMICOLON:
str = "SEMICOLON";
break;
case TokenType::PLUS:
str = "PLUS";
break;
case TokenType::LESS_THAN:
str = "LESS_THAN";
break;
case TokenType::GREATER_THAN:
str = "GREATER_THAN";
break;
case TokenType::INT:
str = "INT";
break;
case TokenType::ID:
str = "ID";
break;
case TokenType::WHITESPACE:
str = "WHITESPACE";
break;
default:
str = "<Unknown Token>";
}
std::cout << str << ", detail='" << it->detail << "'" << std::endl;
}
return 0;
}
lexer.lex() 行抛出异常。在看Lexer.h
:
std::vector<Token> Lexer::lex(void)
{
// Reset input pointer
inputPtr = 0;
updateCurrentChar();
// Read tokens until EOF
std::vector<Token> tokens;
Token *token = nullptr;
do
{
token = getNext();
tokens.push_back(*token);
} while(token->type != TokenType::_EOF);
return tokens;
}
该行tokens.push_back(*token)
抛出异常:
我尝试查看push_back()
此处的信息,并看到:
如果发生重新分配,则使用容器的分配器分配存储,这可能会在失败时抛出异常(对于默认分配器,如果分配请求不成功,则会抛出 bad_alloc)。
这似乎是我的问题,但我不明白为什么分配请求不会成功。
为了完整起见,以下是所有文件:
令牌.h
#pragma once
#include <string>
enum class TokenType
{
_EOF,
ERROR,
EQUALS,
SEMICOLON,
PLUS,
LESS_THAN,
GREATER_THAN,
INT,
ID,
WHITESPACE
};
struct Token
{
TokenType type;
std::string detail;
};
词法分析器
#pragma once
#include <string>
#include <vector>
#include "Token.h"
class Lexer
{
public:
Lexer(std::string);
~Lexer(void);
std::vector<Token> lex(void);
private:
Token* getNext(void);
void updateCurrentChar(void);
void increment(void);
bool matchCurrent(char);
bool isWhitespace(char) const;
bool isDigit(char) const;
bool isLetter(char) const;
bool isLowercaseLetter(char) const;
bool isUppercaseLetter(char) const;
std::string readWhitespace(void);
std::string readInt(void);
std::string readId(void);
std::string input;
int inputPtr;
char currentChar;
const char EOF_CHAR;
};
词法分析器.cpp
#include "Lexer.h"
Lexer::Lexer(std::string _input)
: input(_input), EOF_CHAR(-1)
{
}
Lexer::~Lexer(void)
{
}
std::vector<Token> Lexer::lex(void)
{
// Reset input pointer
inputPtr = 0;
updateCurrentChar();
// Read tokens until EOF
std::vector<Token> tokens;
Token *token = nullptr;
do
{
token = getNext();
tokens.push_back(*token);
} while(token->type != TokenType::_EOF);
return tokens;
}
void Lexer::updateCurrentChar(void)
{
currentChar = inputPtr < input.length()
? input[inputPtr]
: EOF_CHAR;
}
void Lexer::increment(void)
{
inputPtr++;
updateCurrentChar();
}
bool Lexer::matchCurrent(char toMatch)
{
if(toMatch == currentChar)
{
increment();
return true;
}
return false;
}
Token* Lexer::getNext(void)
{
Token token;
if(isWhitespace(currentChar))
{
token.type = TokenType::WHITESPACE;
token.detail = readWhitespace();
return &token;
}
if(isDigit(currentChar))
{
token.type = TokenType::INT;
token.detail = readInt();
return &token;
}
if(isLetter(currentChar))
{
token.type = TokenType::ID;
token.detail = readId();
return &token;
}
if(currentChar == EOF_CHAR)
{
token.type = TokenType::_EOF;
return &token;
}
switch(currentChar)
{
case ';':
token.type = TokenType::SEMICOLON;
case '=':
token.type = TokenType::EQUALS;
case '<':
token.type = TokenType::LESS_THAN;
case '>':
token.type = TokenType::GREATER_THAN;
case '+':
token.type = TokenType::PLUS;
default:
token.type = TokenType::ERROR;
token.detail = currentChar;
}
increment();
return &token;
}
std::string Lexer::readWhitespace(void)
{
std::string ws;
while(isWhitespace(currentChar))
{
ws += currentChar;
increment();
}
return ws;
}
std::string Lexer::readInt(void)
{
std::string ws;
while(isDigit(currentChar))
{
ws += currentChar;
increment();
}
return ws;
}
std::string Lexer::readId(void)
{
std::string ws;
while(isWhitespace(currentChar))
{
ws += currentChar;
increment();
}
return ws;
}
bool Lexer::isDigit(char c) const
{
return c >= '0' && c <= '9';
}
bool Lexer::isLetter(char c) const
{
return isLowercaseLetter(c)
|| isUppercaseLetter(c);
}
bool Lexer::isLowercaseLetter(char c) const
{
return c >= 'a' && c <= 'z';
}
bool Lexer::isUppercaseLetter(char c) const
{
return c >= 'A' && c <= 'Z';
}
bool Lexer::isWhitespace(char c) const
{
switch(c)
{
case ' ':
case '\n':
case '\t':
case '\r':
return true;
default:
return false;
}
}