I have coded a lexical analyzer which parses through a file and just displays the token , lexeme and if they are value/valuer/literal (int,float or literal)
I am pretty sure , the code for the analyzer works but I can't write down the displaytoken where it will show the output in the following way :
Token Lexeme Value/valuer/literal
numt 1234 value
i have this code sample : for the lexical.cpp file
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include "lexical.h"
int value ; // integer
float valuer ; // float
char ch ; // character
string lexeme ; // string
SYMBOL Token ; // token of symbols
string Literal ; // string literals
string reswords[15] ; // reserved words
using namespace std;
//constructor
Lexical::Lexical()
{
}
//destructor
Lexical::~Lexical()
{
}
void Lexical::GetNextToken()
{
// getting the lexeme *******************
// **************************************
value = 0 ;
valuer = 0.0 ;
int i = 1 ; // line tracker
while(!(fin.eof()))
{
lexeme[i] = ch ;
fin.get(ch);
}
if(fin.peek() == '\n')
{
i++ ; // increment the line !
}
if(!fin.eof())
{
ProcessToken() ;
}
else
{
fin.close() ;
Token = eofilet ; // set to end of file
}
}
//rules not implemented yet
void Lexical::displayToken()
{
cout << "Token" << " " << "Lexeme" << " " << " value/valuer/Literal " << endl ;
cout << Token << " " << lexeme << " " << value ;
}
//initialize reserved words
void Lexical::InitResWords()
{
reswords[begint].copy("BEGIN",5,0);
reswords[programt].copy("PROGRAM",7,0);
reswords[constt].copy("CONST",5,0);
reswords[vart].copy("VAR",3,0);
reswords[proceduret].copy("PROCEDURE",9,0);
reswords[ift].copy("IF",2,0);
reswords[whilet].copy("WHILE",5,0);
reswords[thent].copy("THEN",4,0);
reswords[elset].copy("ELSE",4,0);
reswords[realt].copy("REAL",4,0);
reswords[integert].copy("INTEGER",7,0);
reswords[booleant].copy("BOOLEAN",7,0);
reswords[chart].copy("CHAR",4,0);
reswords[arrayt].copy("ARRAY",5,0);
reswords[endt].copy("END",3,0);
}
void Lexical::ProcessToken()
{
lexeme.at(0) = ch ; // 1 character at a time
fin.get(ch) ;
if((lexeme.at(0) >= 'A' && lexeme.at(0) <= 'Z') || (lexeme.at(0) >= 'a' && lexeme.at(0) <= 'z')) // if alphabets
{
int counter = 0 ;
//match word token
if(!isdigit(lexeme.at(0)) && !isalpha(lexeme.at(0)) && lexeme.at(0) != '_')
{
//*********** Working with reserved words !!! **************************************************
int j = 0 ;
bool flag = false ;
while(j < endt)
{
if(lexeme.compare(reswords[j]) == 0)
{
Token = (SYMBOL)j;
displayToken();
flag = true ;
}
}
//**********************************************************************************************
// if not a token , then we are alrea
Token = idt ;// then an identifier token
displayToken();
return ;
}// if ends
lexeme.at(counter) = ch ;// keep proceeding
fin.get(ch) ;
}
else if (lexeme.at(0) >= '0' && lexeme.at(0) <= '9') // if numbers
{
NumToken() ;
}
else if (lexeme.at(0) == '\"') // for string literal
{
ProcessLiteralToken();
}
else if (lexeme.at(0) == '/') // entering comment section
{
if (ch == '/' || ch == '*')
{
// MatchComment();
if (ch == '/') // start of a comment maybe ?
{
//Line comment
while(ch != '\n')
fin.get(ch);
}
else if(ch == '*') // end of a comment ?
{
while(true)
{
fin.get(ch);
if (ch == '*')
{
char peek_value = fin.peek();
if (peek_value == '/')
{
fin.get(ch);
// fin.get(ch);
return;
}
else
continue;
}
} // while ends
}
else
{
cout << "ERROR !!!" ;
}
GetNextToken();
} // comment analyzer then moves to next token !
else
{
OpToken();
}
}
else if ((lexeme.at(0) == '<') || (lexeme.at(0) == '>') || (lexeme.at(0) == '='))
{
if (ch == '=')
{
lexeme.at(1) = ch;
Token = relop ;
fin.get(ch);
}
else
OpToken() ; // process the final token
}
else if ((lexeme.at(0)) == ':')
{
if (ch == '=')
{
lexeme.at(1) = ch;
Token = relop ;
fin.get(ch);
}
}
else
OpToken();
}
void Lexical::OpToken()
{
//Need to detect +, -, ||, *, /, &&, =, (), {}, comma, semicolon, period, quotation("), and []
if(lexeme.at(0) == '+' || lexeme.at(0) == '-' || lexeme.at(0) == 'OR')
{
Token = addop ;
return;
}
else if(lexeme.at(0) == '*' || lexeme.at(0) == '/' || lexeme.at(0) == 'DIV' || lexeme.at(0) == 'MOD' || lexeme.at(0) == 'AND')
{
Token = mulop ;
return;
}
else if(lexeme.at(0) == '<' || lexeme.at(0) == '>' || lexeme.at(0) == '=')
{
Token = relop ;
return;
}
else if(lexeme.at(0) == '(')
{
Token = lparen;
return;
}
else if(lexeme.at(0) == ')')
{
Token = rparen;
return;
}
else if(lexeme.at(0) == '{')
{
Token = clbrat;
return;
}
else if(lexeme.at(0) == '}')
{
Token = crbrat;
return;
}
else if(lexeme.at(0) == ',')
{
Token = comma;
return;
}
else if(lexeme.at(0) == ';')
{
Token = semicolon ;
return;
}
else if(lexeme.at(0) == '.')
{
Token = period ;
return;
}
else if(lexeme.at(0) == '~')
{
Token = tildat;
return;
}
else if(lexeme.at(0) == '[')
{
Token = lbrat;
return;
}
else if(lexeme.at(0) == ']')
{
Token = rbrat;
return;
}
else if(lexeme.at(0) == '#')
{
Token = nott ;
return;
}
else
{
Token = unknownt ;
cout << "Error !!" ;
exit(0);
}
}
void Lexical::NumToken()
{
int i = 0;
while (isdigit(ch))
{
//Update Lexeme and keep going
lexeme.at(i++) = ch;
fin.get(ch);
}
if (ch == '.') // checking for float
{
//if ch is a period, then we might be dealing with a float. We need to ensure that the next character is a digit
lexeme.at(i++) = ch;
fin.get(ch);
if (isdigit(ch))
{
//ch is a digit, so we are good to go.
while (isdigit(ch))
{
lexeme.at(i++) = ch;
fin.get(ch);
}
//If we are here, then we have a float and we have just encountered a new token
Token = numt;
valuer = atof(lexeme.c_str());
return;
}
//If we are here, then we have a period but no digit after it--an error
Token = unknownt ;
cout << "Error." << endl;
exit(0);
}
else {
//If we are here, then it means that the next char is not a period.... so we have a NUMT int token
Token = numt;
value = atoi(lexeme.c_str());
return;
}
}
void Lexical::ProcessLiteralToken()
{
int i = 0;
while (ch != '\"')
{
if (ch == '\n')
{
Token = unknownt ;
cout << "Error!" << endl ;
exit(0);
}
Literal.at(i++) = ch ; // advance !
fin.get(ch);
}
//Literal[i++] = ch;
Token = literalt;
fin.get(ch);
return;
}
My lexical.h ( HEADER FILE ) is this :
#ifndef _LEXICAL_H
#define _LEXICAL_H
#include <iostream>
#include <fstream>
using namespace std ;
// enumerated data type
enum SYMBOL
{
begint,programt, constt, vart, proceduret, ift, whilet, thent, elset, realt, integert, booleant, chart, arrayt, endt, divt, modt, andt, nott, ort, addop, mulop, assignop, lparen, rparen, comma, semicolon, period, numt, idt, literalt, unknownt, eofilet, relop, clbrat, crbrat, tildat, lbrat, rbrat ,colon
};
//extern int size = 15 ;
extern int value ; // integer
extern float valuer ; // float
extern char ch ; // character
extern string lexeme ; // string
extern SYMBOL Token ; // token of symbols
extern string Literal ; // string literals
extern string reswords[15] ; // string array
class Lexical {
public :
Lexical(); // constructor
~Lexical() ; // destructor
//GetNextToken function
void GetNextToken() ;
//displayToken function
void displayToken() ;
private :
//initialize reserved words
void InitResWords() ;
//ProcessToken
void ProcessToken() ;
// operator tokens
void OpToken() ;
//NumToken
void NumToken() ;
//Process string literals
void ProcessLiteralToken() ;
ifstream fin ; // file
} ;
#endif // !_lexical_H
The code is give if anyone wants to go through it , for any function.
I just cannot figure out how to correctly display my tokens ! (displayToken) Can some one help me with the function so for every token , it parses , it displays it by the format : Token ----- Lexeme ------- Value/valuer/literal numt ----- 1234 ------ value
i just want to write down the display token function. How will you display the lexeme, token and whether they are value/valuer or literal after you process each token ?
this is my driver file
#include "lexical.h"
#include <iostream>
#include <fstream>
using namespace std;
int main()
{
cout << "creating the constructor" << endl << endl ;
Lexical myLex ;
ifstream fin;
fin.open("test.txt") ;
while (Token != eofilet)
{
myLex.GetNextToken();
myLex.displayToken();
}
cout << endl << "success" << endl ;
fin.close();
//system("pause");
return 0;
}