-1

谁能告诉我以下假设从输入中删除注释和字符串的代码有什么问题(但不是注释,这就是它识别注释的原因)?这与我之前的一个问题有关:Removing comments with a rolling window without nested while loops

#include <stdio.h>

int main()
{
    int c, c1 = 0, c2 = 0 ,state = 0, next = 0;
    while(1)
    {
        switch(state)
        {
           case 0: next = ((c2 == '*' && c1 == '/') ? 1 : (c2 == '\"') ? 2 : (c2 == '/' && c1 == '/') ? 3 : (c2 == '\'') ? 4: 0); break; 
           case 1: next = ((c2 == '/' && c1 == '*') ? 0 : 1); break; 
           case 2: next = ((c2 == '\"' && c1 != '\\') ? 0 : 2); break;
           case 3: next = ((c2 == '\n') ? 0 : 3); break;
           case 4: next = ((c2 == '\'' && c1 != '\\') ? 0 : 4); break;
           default: next = state; 
        }
        c = getchar(); if( c < 0) break;
        c1 = c2; c2 = c; // slide window
        if(state == 1)
        {
            if(c2 == '*')
            {
                c = getchar();
                c1 = c2; c2 = c;
                if(c2 != '/')
                   putchar(c1);
            }
            else
                putchar(c2);
        }
        else if(state == 2)
        {
            if(c2 != '"' || (c2 == '\"' && c1 != '\\'))
                putchar(c2);
        }
        else if(state == 3)
        {
                putchar(c2);
        }
        else
        state = next;
        // c2 is the current input byte and c1 is the previous input byte
    }
    return 0;
}
4

2 回答 2

1

我认为您实际上不需要一个滑动窗口来执行删除 C 和 C++ 注释的任务。您可以扩展您的状态机以包含一些用于跟踪逃逸等的附加状态......随着更多的状态,代码会变得更大,但它可能会在概念上使其更简单,因为您只有一个状态要跟踪。因此,将您的代码精神转换为我建议的新状态机公式,您会得到下面的代码(我也同意 Basile 的使用枚举的建议并将其包含在内)。

#include <stdio.h>

int main()
{
    enum {
        START, SLASH,
        STRING, CHAR, STRING_ESCAPE, CHAR_ESCAPE,
        SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT, MULTI_LINE_END,
    } state = START;
    int c;

    while ((c = getchar()) != EOF) {
        switch (state) {
        case START:
        state_START:
            if (c == '/') { state = SLASH; break; }
            putchar(c);
            if (c == '\"') state = STRING;
            else if (c == '\'') state = CHAR;
            break;
        case SLASH:
            if (c == '/') state = SINGLE_LINE_COMMENT;
            else if (c == '*') state = MULTI_LINE_COMMENT;
            else { state = START; goto state_START; }
            break;
        case STRING:
            putchar(c);
            if (c == '"') state = START;
            else if (c == '\\') state = STRING_ESCAPE;
            break;
        case CHAR:
            putchar(c);
            if (c == '\'') state = START;
            else if (c == '\\') state = CHAR_ESCAPE;
            break;
        case SINGLE_LINE_COMMENT:
            if (c == '\n') state = START;
            break;
        case MULTI_LINE_COMMENT:
        state_MULTI_LINE_COMMENT:
            if (c == '*') state = MULTI_LINE_END;
            break;
        case STRING_ESCAPE:
            putchar(c);
            state = STRING;
            break;
        case CHAR_ESCAPE:
            putchar(c);
            state = CHAR;
            break;
        case MULTI_LINE_END:
            if (c == '/') state = START;
            else { state = MULTI_LINE_COMMENT; goto state_MULTI_LINE_COMMENT; }
            break;
        }
    }
    return 0;
}
于 2013-04-19T16:58:08.343 回答
0

只是在不知道您的目的的情况下给您建议...您是否考虑过正则表达式来解决您的问题?假设您了解正则表达式,它可能会更快并且您的代码会更干净。

顺便说一句,我为您的问题找到了一个简洁的站点...它解释了如何从代码中获取这些注释...

如何使用正则表达式获取评论

这是 C 中的正则表达式库

于 2013-04-19T13:05:02.550 回答