0

所以我编写了这个程序来标记一个包含代码的文件,并将每个标识符、关键字、数字和符号分隔成一个数字标记。我遇到的问题是,虽然它会为每行中的第一个单词分配正确的标记。后面的行中的每个标记都被认为是一个标识符。这是我的标记器功能,我认为问题出在:

    public class cmmLex {

    public static boolean isLetter(char b){        
        char[] letters_ = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D',
            'E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'};
        for (int i = 0; i < letters_.length; i++) {
            if(b == letters_[i])
                return true;
        }
        return false;
    }

    public static boolean isNumber(char b){        
        char[] numbers = {'0','1','2','3','4','5','6','7','8','9'};
        for (int i = 0; i < numbers.length; i++) {
            if(b == numbers[i])
                return true;
        }
        return false;
    }

    public static boolean isKeyword(StringBuffer str){
        String[] keywords = {"int", "double", "if", "while","return","void","else"};
        for (int i = 0; i < keywords.length; i++) {
            if (keywords[i].equals(str.toString()))
                return true;
        }

        return false;
    }

    public static boolean isSymbol(char a){
        char[] symbols = {'+','-','*','/','<','>','!','=',',','.','(',')','[',']','{','}'};
        for (int i = 0; i < symbols.length; i++) {
            if(a == symbols[i])
                return true;
        }

        return false;
    }

    public static void lexMe(StringBuffer string)
    {   

        if(isKeyword(string)){
            switch(string.toString()){
                case "double":
                    System.out.print("0 ");
                    break;
                case "else":
                    System.out.print("1 ");
                    break;
                case "if":
                    System.out.print("2 ");
                    break;
                case "int":
                    System.out.print("3 ");
                    break;
                case "return":
                    System.out.print("4 ");
                    break;
                case "void":
                    System.out.print("5 ");
                    break;
                case "while":
                    System.out.print("6 ");
                    break;
            }  
        }else{
            System.out.print("27 ");
        }
    }


    public static void tokenize(String line){
        StringBuffer consumed = new StringBuffer();
        outerloop:
        for (int i = 0; i < line.length(); i++) {
            char ch = line.charAt(i);
            if(isLetter(ch) && consumed.length() == 0){
                consumed.append(line.charAt(i));
                for (int j = i+1; j < line.length(); j++) {
                    ch = line.charAt(j);
                    if(isLetter(ch) || isNumber(ch)){
                        consumed.append(ch);
                    }else{
                        //call lexme to tokenize string

                        lexMe(consumed);

                        //consumed.setLength(0);
                        i = j;
                    }

                }
            }else if(isNumber(ch) && consumed.length() == 0){
                consumed.append(line.charAt(i) );
                for (int j = i+1; j < line.length(); j++) {
                    ch = line.charAt(j);
                    if(isNumber(ch) || line.charAt(j) == '.'){
                        consumed.append(ch);
                    }else{
                        System.out.print("28 ");
                        i = j;
                    }
                }
            }else if (isSymbol(ch)){
                switch(ch){
                    case '+':
                        System.out.print("7 ");
                        break;
                    case '-':
                        System.out.print("8 ");
                        break;
                    case '*':
                        if(line.charAt(i-1) == '/'){
                            break outerloop;
                        }else{
                            System.out.println("9 ");
                        }                       
                        break;
                    case '/':
                        if(line.charAt(i+1) == '/')
                            break outerloop;
                        else if((ch = line.charAt(i+1)) == '*'){
                            consumed.append(ch);
                            for (int j = i; j < line.length(); j++) {
                                ch = line.charAt(j);
                                if(ch == '*'){
                                    if(ch == '/'){
                                        break outerloop;
                                    }
                                }else{
                                    consumed.append(ch);
                                }        

                            }
                        }else{
                            System.out.println("10 ");

                        } 
                        break;
                    case '<':
                        if(line.charAt(i+1) == '='){
                            System.out.print("12 ");
                            break;
                        }
                        System.out.print("11 ");
                        break;                 
                    case '>':
                        if(line.charAt(i+1) == '='){
                            System.out.print("14 ");
                            break;
                        }
                        System.out.print("13 ");
                        break;
                    case '!':
                        if(line.charAt(i+1) == '='){
                            System.out.print("16 ");
                            break;
                        }
                        break;
                    case '=':
                        System.out.print("15 ");
                        break;
                    case ';':
                        System.out.print("18 ");
                        break;
                    case ',':
                        System.out.print("19 ");
                        break;
                    case '.':
                        System.out.print("20 ");
                        break;
                    case '(':
                        System.out.print("21 ");
                        break;
                    case ')':
                        System.out.print("22 ");
                        break;
                    case '[':
                        System.out.print("23 ");
                        break;
                    case ']':
                        System.out.print("24 ");
                        break;
                    case '{':
                        System.out.print("25 ");
                        break;
                    case '}':
                        System.out.print("26 ");
                        break;
                }
            }
        }
        System.out.println("");
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        File file = new File("src\\testCode.txt");
        String testCode;

        try {

            Scanner scanner = new Scanner(file);

            while (scanner.hasNextLine()) {
                String line = scanner.nextLine();
                tokenize(line);
            }
            scanner.close();
        }catch (FileNotFoundException e) {

        }

    }
}

我遇到的另一个问题是我无法纠正忽略评论块。当我使用“/ ”并中断循环时,我尝试设置一个注释块布尔标志,该标志仍​​然设置我想扫描其余的行,直到我看到“ /”,然后将标志设置为错误的。然而这并没有奏效。有任何想法吗?

这是我文件的第一行:

诠释事实(诠释 x){

它应该打印出这一行:

3 27 21 3 27 22 25

这就是它目前的表现:

3 27 27 27 27 27

也许我没有正确处理空格?

4

3 回答 3

1

您的lexMe方法的开关正在检查一个字符串,该字符串在您超过第一个单词后收集的内容比应有的要多。

使用调试器查看它,或者将该值的调试打印出来,您就会看到问题所在。

这个问题似乎是因为这条线

  consumed.setLength(0);

在您发布的代码中被注释掉了。

放回去后,仍然存在问题,因为在处理“事实”字符串之后,它会继续一个基于检查的条件内部的 for 循环,if(isLetter(ch) && consumed.length() == 0)当它真的应该重新检查该条件时。

我建议使用调试器来了解您的代码当前正在做什么,然后进行一些重大的重构。

笔记

我现在查看了另一个答案,它通过添加中断来解决这些问题(我假设没有实际运行)。

但我强烈建议重新考虑整个事情,因为嵌套循环和条件以及使用break会使代码非常混乱。

于 2012-05-13T20:25:39.237 回答
1

您在 tokenize() 中的循环中有问题。这是您的代码的更正版本:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Scanner;

public class cmmLex {

    public static boolean isLetter(char b){        
        char[] letters_ = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D',
            'E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'};
        for (int i = 0; i < letters_.length; i++) {
            if(b == letters_[i])
                return true;
        }
        return false;
    }

    public static boolean isNumber(char b){        
        char[] numbers = {'0','1','2','3','4','5','6','7','8','9'};
        for (int i = 0; i < numbers.length; i++) {
            if(b == numbers[i])
                return true;
        }
        return false;
    }

    public static boolean isKeyword(StringBuffer str){
        String[] keywords = {"int", "double", "if", "while","return","void","else"};
        for (int i = 0; i < keywords.length; i++) {
            if (keywords[i].equals(str.toString()))
                return true;
        }

        return false;
    }

    public static boolean isSymbol(char a){
        char[] symbols = {'+','-','*','/','<','>','!','=',',','.','(',')','[',']','{','}'};
        for (int i = 0; i < symbols.length; i++) {
            if(a == symbols[i])
                return true;
        }

        return false;
    }

    public static void lexMe(StringBuffer string)
    {   

        if(isKeyword(string)){
            switch(string.toString()){
                case "double":
                    System.out.print("0 ");
                    break;
                case "else":
                    System.out.print("1 ");
                    break;
                case "if":
                    System.out.print("2 ");
                    break;
                case "int":
                    System.out.print("3 ");
                    break;
                case "return":
                    System.out.print("4 ");
                    break;
                case "void":
                    System.out.print("5 ");
                    break;
                case "while":
                    System.out.print("6 ");
                    break;
            }  
        }else{
            System.out.print("27 ");
        }
    }


    public static void tokenize(String line){
        StringBuffer consumed = new StringBuffer();
        outerloop:
        for (int i = 0; i < line.length(); i++) {
            char ch = line.charAt(i);
            if(isLetter(ch) && consumed.length() == 0){
                consumed.append(line.charAt(i));
                for (int j = i+1; j < line.length(); j++) {
                    ch = line.charAt(j);
                    if(isLetter(ch) || isNumber(ch)){
                        consumed.append(ch);
                    }else{
                        //call lexme to tokenize string

                        lexMe(consumed);

                        consumed.setLength(0);
                        i = j - 1;
                        break;
                    }

                }
            }else if(isNumber(ch) && consumed.length() == 0){
                consumed.append(line.charAt(i) );
                for (int j = i+1; j < line.length(); j++) {
                    ch = line.charAt(j);
                    if(isNumber(ch) || line.charAt(j) == '.'){
                        consumed.append(ch);
                    }else{
                        System.out.print("28 ");
                        consumed.setLength(0);
                        i = j - 1;
                        break;
                    }
                }
            }else if (isSymbol(ch)){
                switch(ch){
                    case '+':
                        System.out.print("7 ");
                        break;
                    case '-':
                        System.out.print("8 ");
                        break;
                    case '*':
                        if(line.charAt(i-1) == '/'){
                            break outerloop;
                        }else{
                            System.out.println("9 ");
                        }                       
                        break;
                    case '/':
                        if(line.charAt(i+1) == '/')
                            break outerloop;
                        else if((ch = line.charAt(i+1)) == '*'){
                            consumed.append(ch);
                            for (int j = i; j < line.length(); j++) {
                                ch = line.charAt(j);
                                if(ch == '*'){
                                    if(ch == '/'){
                                        break outerloop;
                                    }
                                }else{
                                    consumed.append(ch);
                                }        

                            }
                        }else{
                            System.out.println("10 ");

                        } 
                        break;
                    case '<':
                        if(line.charAt(i+1) == '='){
                            System.out.print("12 ");
                            break;
                        }
                        System.out.print("11 ");
                        break;                 
                    case '>':
                        if(line.charAt(i+1) == '='){
                            System.out.print("14 ");
                            break;
                        }
                        System.out.print("13 ");
                        break;
                    case '!':
                        if(line.charAt(i+1) == '='){
                            System.out.print("16 ");
                            break;
                        }
                        break;
                    case '=':
                        System.out.print("15 ");
                        break;
                    case ';':
                        System.out.print("18 ");
                        break;
                    case ',':
                        System.out.print("19 ");
                        break;
                    case '.':
                        System.out.print("20 ");
                        break;
                    case '(':
                        System.out.print("21 ");
                        break;
                    case ')':
                        System.out.print("22 ");
                        break;
                    case '[':
                        System.out.print("23 ");
                        break;
                    case ']':
                        System.out.print("24 ");
                        break;
                    case '{':
                        System.out.print("25 ");
                        break;
                    case '}':
                        System.out.print("26 ");
                        break;
                }
            }
        }
        System.out.println("");
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        File file = new File("src\\testCode.txt");
        String testCode;

        try {

            Scanner scanner = new Scanner(file);

            while (scanner.hasNextLine()) {
                String line = scanner.nextLine();
                tokenize(line);
            }
            scanner.close();
        } catch (FileNotFoundException e) {

        }

    }
}
于 2012-05-13T20:26:14.630 回答
0

Handwritten lexical analyzers are always a pain to write and debug. May I suggest that you use a higher level tool to do this, eg: JLex, or JFlex. This will save you a lot of pain.

于 2012-05-13T20:56:16.090 回答