6

我正在从 K&R 书中学习 C,对于第一章的练习 1.23,我必须编写一个程序,在给定用户输入的一些 C 代码的情况下删除所有注释。这是我到目前为止完成的程序。我可以对其进行任何改进吗?

/**
 Tuesday, 10/07/2013

 Exercise 1.23
 Write a program to remove all comments from a C 
 program. Don't forget to handle quoted strings 
 and character constants properly. C comments   
 don't nest.
**/

#include <stdio.h>
#define MAX_LENGTH 65536
#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2

main()
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    int size = 0;                 /* Length of the inputted code */
    int loop;                     /* Integer used for the for loop */
    char c;                       /* Character to input into */
    int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
    int in_string = 0;            /* Are we inside of a string constant? */
    char last_character;          /* Value of the last character */


    /* Input all code into the buffer until escape sequence pressed */
    while ((c = getchar()) != EOF)
        code[size++] = c; 
    code[size] = '\0'; 


    /* Remove all comments from the code and display results to user */
    for (loop = 0; loop < size; loop++) {
        char current = code[loop]; 

        if (in_string) {
            if (current == '"') in_string = 0; 
            putchar(current);
        }

        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    putchar(current);
                    in_string = 1; 
                    continue; 
                }

                if (current == '/' && last_character == '/') status = SINGLE_COMMENT;
                else if (current == '*' && last_character == '/') status = MULTI_COMMENT; 
                else if (current != '/' || (current == '/' && loop < size-1 && !(code[loop+1] == '/' || code[loop+1] == '*'))) putchar(current); 
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT; 
                    putchar('\n');
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && last_character == '*') status = NOT_IN_COMMENT; 
            }
        }

        last_character = current; 
    }
}
4

6 回答 6

4

将您的注释剥离移动到一个函数中(更有用),并使用 fgets() 一次读取一行,last_character 是模棱两可的(是最后一个还是上一个?),这使用的 putchar() 调用要少得多,只有每行一个 printf(可以使用 puts),保留您所做的大部分工作,

#include <stdio.h>
#include <string.h>
#define MAX_LENGTH 65536

#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2
int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
int in_string = 0;            /* Are we inside of a string constant? */
char* stripcomments(char* stripped,char* code)
{
    int ndx;                      /* index for code[] */
    int ondx;                     /* index for output[] */
    char prevch;                  /* Value of the previous character */
    char ch;                      /* Character to input into */

    /* Remove all comments from the code and display results to user */
    for (ndx=ondx=0; ndx < strlen(code); ndx++)
    {
        char current = code[ndx];

        if (in_string) {
            if (current == '"') in_string = 0;
            stripped[ondx++] = current;
        }
        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    stripped[ondx++] = current;
                    in_string = 1;
                    continue;
                }

                if (current == '/' && prevch == '/') status = SINGLE_COMMENT;
                else if (current == '*' && prevch == '/') status = MULTI_COMMENT;
                else if (current != '/' || (current == '/' && ndx < strlen(code)-1 && !(code[ndx+1] == '/' || code[ndx+1] == '*'))) stripped[ondx++] = current;
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT;
                    stripped[ondx++] = '\n';
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && prevch == '*') status = NOT_IN_COMMENT;
            }
        }
        prevch = current;
    }
    stripped[ondx] = '\0';
    return(stripped);
}

int main(void)
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    char stripped[MAX_LENGTH];

    while( fgets(code,sizeof(code),stdin) )
    {
        //printf("%s\n",code);
        //strip comments...
        stripcomments(stripped,code);
        if( strlen(stripped) > 0 ) printf("%s",stripped);
    }
}

我会留给你删除多余的空行。

于 2013-10-08T03:39:22.813 回答
1

当您处理带引号的字符串时,您应该检测转义的引号 ( \")。eg"\"/* not a comment */\""是一个有效的字符串,但我认为你的代码会从它的中间去掉错误的注释。

如果您想真正正确,您还应该处理行继续(以 a 结尾的行\在下一行继续)。为了增加毛羽,您还应该处理三元组。??/"是转义引号,??/行尾是续行。

代码的风格看起来很不错,虽然 main 应该更恰当地声明为int main(void).

于 2013-10-08T03:18:54.270 回答
1

在我看来不错,干得好!

也许可以通过添加一些评论来改进它:) 作为一个粗略的指南,为每个条件添加一个。您正在评论,但只是停在循环内部的多汁部分。但是代码看起来很可读。

它有效吗?你测试过吗?

如果我有一个包含转义双引号的字符串,看起来它可能会失败......例如"He said, \"Hello, World!\""

于 2013-10-08T03:18:54.630 回答
0

我喜欢这个线程在我的项目中包含一个“评论剥离器”,然后再将它交给 JSON 解析器。只有我更喜欢 FSM 方法。希望我的实现对任何人都是可以理解和有用的:

#include <stdio.h>
#include <string.h>

void strip(int ch, FILE *stream)
{
    static enum strip_states {
        STRIP_STATE_PUTC = 0,
        STRIP_STATE_SINGLE,
        STRIP_STATE_MULTI,
        STRIP_STATE_STRING,
    } state = STRIP_STATE_PUTC;
    static char _ch = 0;
    static unsigned char _nestlevel = 0;

    /* String */
    if (state == STRIP_STATE_PUTC &&
        ch == '"') {
        state = STRIP_STATE_STRING;
    } else if (state == STRIP_STATE_STRING &&
        ch == '"') {
        state = STRIP_STATE_PUTC;
    /* Multiline */
    } else if (_ch == '/' && ch == '*') {
        if (state == STRIP_STATE_PUTC)
            state = STRIP_STATE_MULTI;
        else if (state == STRIP_STATE_MULTI)
            _nestlevel++;
    } else if (_ch == '*' && ch == '/') {
        if (state == STRIP_STATE_MULTI &&
            _nestlevel > 0)
            _nestlevel--;
        else if (state == STRIP_STATE_MULTI &&
            _nestlevel == 0)
            state = STRIP_STATE_PUTC;
    /* Singleline */
    } else if (state == STRIP_STATE_PUTC &&
            _ch == '/' && ch == '/') {
        state = STRIP_STATE_SINGLE;
    } else if (state == STRIP_STATE_SINGLE &&
            ch == '\n') {
        state = STRIP_STATE_PUTC;
    }

    /* Put character */
    if ((state == STRIP_STATE_PUTC && ch != '/') ||
        state == STRIP_STATE_STRING)
        fputc(ch, stream);

    _ch = ch;
}

int main(void)
{
    int ch;
    while ((ch = fgetc(stdin)) != EOF)
        strip(ch, stdout);

    return 0;
}

什么有效:

  • 单行注释"xxx // comment"
  • 普通多行注释"xxx /* comment\n another comment */ yyy"
  • 嵌套评论"xxx /* comment /* nested comment */ end of comment */ yyy

目前尚未实施和测试:

  • 单行注释后面的多行注释
  • 续行
  • 转义字符

亲切的问候,杰瑞

于 2015-04-29T18:06:23.493 回答
0
//G H PATEL COLLEGE OF ENGINEERING & TECHNOLOGY.    
//c program to remove comments from given src.txt file, and write back to dest.txt file.
#include <stdio.h>
int main()
{
    FILE *src,*dest;
    char ch,pre,line[100];
    int nflag,qflag,index;

    src=fopen("src.txt","r+");
    dest=fopen("dest.txt","w+");

    nflag=1;

    while(!feof(src))
    {
        index=0;
        for(ch=fgetc(src);ch!=EOF && ch!='\n';)
        {
            if(ch=='"'&&pre!='\\')
            {
                qflag=0;
                for(;ch!='\n' && qflag==0;)
                {
                    line[index++]=ch;
                    pre=ch;
                    ch=fgetc(src);
                    if(ch=='"'&&pre!='\\')
                    {
                        qflag=1;
                        line[index++]=ch;
                        pre=ch;
                        ch=fgetc(src);
                        break;
                    }
                }
            }
            else if(ch=='/')
            {
                pre=ch;
                ch=fgetc(src);
                if(ch=='/')
                {
                    for(;fgetc(src)!='\n';);
                    break;
                }
                if(ch=='*')
                {
                    nflag=1;
                    for(ch=fgetc(src);nflag==1;)
                    {
                        if(ch=='*')
                        {
                            pre=ch;
                            ch=fgetc(src);
                            if(ch=='/')
                            {
                                nflag=0;
                            }
                        }
                        else
                        {
                            pre=ch;
                            ch=fgetc(src);
                        }
                    }
                }
            }
            else
            {
                line[index++]=ch;
                pre=ch;
                ch=fgetc(src);
            }
        }
        line[index]='\0';
        if(index>0)
        {
            line[index] = '\0';
            fprintf(dest,"%s\n",line);
            fflush(stdin);
        }
    }
    getch();
    fclose(src);
    fclose(dest);
    return 0;
}
于 2014-12-28T05:06:24.397 回答
0

您可以参考下面的简单代码:

#include <stdio.h>

int main(int argc, char **argv) {
    char code[1000];
    char output[1000];
    char ch;
    int i = 0;
    //store code in array
    while ((ch = getchar()) != EOF) {
        code[i++] = ch;
    }
    code[i] = '\0';
    int index = 0;
    i = 0;
    //store removed comment code in output
    while (code[i] != EOF) {
        if (code[i] == '/' && code[i + 1] == '/') {        //to remove single line comments
            while (code[i] != '\n')
                i++;
        } else if (code[i] == '/' && code[i + 1] == '*') {  //to remove multi line comments
            i = i + 2;
            while (code[i] != '*' && code[i + 1] != '/') {
                i++;
            }
            i = i + 3;
        } else {           //store the rest of the code in output array
            output[index++] = code[i++];
        }

    }
    output[index] = '\0';
    printf("%s", output);
}

输入:

#include<stdio.h>
void main()
{
printf("Hello");
/*--------------------------------------------
  ------------------Ignored by compiler-------
  --------------------------------------------
*/
printf("By");
}

输出:

#include<stdio.h>
void main()
{
printf("Hello");
printf("By");
}
于 2017-02-18T07:04:24.987 回答