3

以下面这个非常简单的 C 程序为例。我的理解是首先调用预编译器来扩展宏和头文件等。

我的理解是,预编译器会在编译之前首先将头文件stdio.h中的所有代码(声明)包含在 C 文件中,因此使 C 文件更大并且行号不同,因此printf()调用将进一步下降文件。

如果是这样,为什么在调试期间行号仍然正确?

#include <stdio.h>
int main()
{
    printf("Hello world!\n");
}
4

3 回答 3

4

是的,头文件的内容包含在文本中。

行号被保留,因为预处理器具有指定行号的机制:

#line 97 "original.c"
/* This is line 97 of the original C file */

这些行号用于报告错误。通常,编译器使用它可以理解的简写符号——它经常省略这个词line并添加额外的信息——例如,参见 GCC Preprocessor 输出

编译可以看到

#line 1000
pod variable = { 0 };

把它放在一个文件中;尝试编译它;请注意,编译器在第 1000 行抱怨未知类型pod,即使它是源文件中的第 2 行。

$ gcc -O -c original.c
original.c:1000:1: error: unknown type name ‘pod’
$

-E您可以使用GCC 和大多数基于 Unix 的 C 编译器的选项运行预处理器并查看(大量)输出:

$ gcc -E original.c
# 1 "original.c"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "original.c"
# 1000 "original.c"
pod variable = { 0 };
$

请注意,如果我包含<stdio.h>,输出会大得多(我在一台机器上得到 577 行 - 这比我预期的要少)。

您还可以cpp使用 C 编译器的参数子集运行独立的 C 预处理器(通常称为)。这有时会给您与主编译器稍有不同的结果,但这通常是查看正在发生的事情的好方法。


现在,我的一些代码有问题——我的测试用例是:

/* Minimized GCC bug */

#include <assert.h>
#include <string.h>

extern int chk_arg(const char *arg);

int chk_arg(const char *arg)
{
    assert(strncmp(arg, "-", 1) == 0 && strncmp(arg, "--", 2) != 0);
    return (arg[1] == 'a');
}

令我苦恼的编译错误是:

$   gcc -std=c11 -Wall -Wmissing-prototypes -Wstrict-prototypes -Wextra -pedantic -Werror -g -O3   -c gccbug.c
gccbug.c: In function ‘chk_arg’:
gccbug.c:10:5: error: string length ‘4587’ is greater than the length ‘4095’ ISO C99 compilers are required to support [-Werror=overlength-strings]
   10 |     assert(strncmp(arg, "-", 1) == 0 && strncmp(arg, "--", 2) != 0);
      |     ^~~~~~
cc1: all warnings being treated as errors
$

在您使用之前,哪里有 4587 个字符的字符串并不明显gcc -E

int chk_arg(const char *arg)
{

# 10 "gccbug.c" 3 4
   (((__extension__ (__builtin_constant_p (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ) && ((__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ))) || (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   )))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && __builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s2_len = strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ), (!((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) || __s2_len >= 4)) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s1_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ); register int __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) && (__s2_len = strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ), __s2_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[3]); } } __result; }))) : __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   )))); }) : strncmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ))) 
# 10 "gccbug.c"
   == 0 && 
# 10 "gccbug.c" 3 4
   (__extension__ (__builtin_constant_p (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ) && ((__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ))) || (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   )))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && __builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s2_len = strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ), (!((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) || __s2_len >= 4)) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s1_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ); register int __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) && (__s2_len = strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ), __s2_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[3]); } } __result; }))) : __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   )))); }) : strncmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ))) 
# 10 "gccbug.c"
   != 0
# 10 "gccbug.c" 3 4
   ) ? (void) (0) : (__assert_fail (
# 10 "gccbug.c"
   "(__extension__ (__builtin_constant_p (1) && ((__builtin_constant_p (arg) && strlen (arg) < ((size_t) (1))) || (__builtin_constant_p (\"-\") && strlen (\"-\") < ((size_t) (1)))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (arg) && __builtin_constant_p (\"-\") && (__s1_len = strlen (arg), __s2_len = strlen (\"-\"), (!((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) || __s2_len >= 4)) ? __builtin_strcmp (arg, \"-\") : (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) && (__s1_len = strlen (arg), __s1_len < 4) ? (__builtin_constant_p (\"-\") && ((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) ? __builtin_strcmp (arg, \"-\") : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (\"-\"); register int __result = (((__const unsigned char *) (__const char *) (arg))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (arg))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (\"-\") && ((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) && (__s2_len = strlen (\"-\"), __s2_len < 4) ? (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) ? __builtin_strcmp (arg, \"-\") : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (arg); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (\"-\"))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (\"-\"))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (\"-\"))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (\"-\"))[3]); } } __result; }))) : __builtin_strcmp (arg, \"-\")))); }) : strncmp (arg, \"-\", 1))) == 0 && (__extension__ (__builtin_constant_p (2) && ((__builtin_constant_p (arg) && strlen (arg) < ((size_t) (2))) || (__builtin_constant_p (\"--\") && strlen (\"--\") < ((size_t) (2)))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (arg) && __builtin_constant_p (\"--\") && (__s1_len = strlen (arg), __s2_len = strlen (\"--\"), (!((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) || __s2_len >= 4)) ? __builtin_strcmp (arg, \"--\") : (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) && (__s1_len = strlen (arg), __s1_len < 4) ? (__builtin_constant_p (\"--\") && ((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) ? __builtin_strcmp (arg, \"--\") : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (\"--\"); register int __result = (((__const unsigned char *) (__const char *) (arg))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (arg))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (\"--\") && ((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) && (__s2_len = strlen (\"--\"), __s2_len < 4) ? (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) ? __builtin_strcmp (arg, \"--\") : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (arg); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (\"--\"))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (\"--\"))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (\"--\"))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (\"--\"))[3]); } } __result; }))) : __builtin_strcmp (arg, \"--\")))); }) : strncmp (arg, \"--\", 2))) != 0"
# 10 "gccbug.c" 3 4
   , "gccbug.c", 10, __PRETTY_FUNCTION__), (void) (0)))
# 10 "gccbug.c"
                                                                  ;
    return (arg[1] == 'a');
}

哎哟! 我不知道如果断言被触发,我会识别它——它看起来不像源代码中的文本,这是肯定的!(这来自在古老的 RHEL 5 Linux 上运行的 GCC 9.2.0。)

于 2019-10-03T19:18:12.717 回答
2

编译器跟踪每个输入行的文件名和行号。该信息与每个令牌一起保存,以便在需要时使用(通常用于创建错误或警告消息)。

于 2019-10-03T19:18:52.197 回答
0

预处理器输出一些特殊的指令,这些指令可以控制源文件的名称和编译器认为它正在处理的行号。

鉴于这些文件:

x1.h:

int x = 4;

int printf(const char *, ...);

x1.c:

#include "x1.h"

int main()
{
    printf("x=%d\n", x);
    return 0;
}

gcc 的预处理器输出以下内容

# 1 "x1.c"
# 1 "<built-in>"
# 1 "<command-line>"
# 31 "<command-line>"
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 32 "<command-line>" 2
# 1 "x1.c"
# 1 "x1.h" 1
int x;

int printf(const char *, ...);
# 2 "x1.c" 2

int main()
{
    printf("x=%d\n", x);
    return 0;
}

以 开头的每一行都#包括当前行号、当前源文件的名称以及一些附加标志。

在开始时,您会看到 source:line 设置x1.c:1为主源文件的开头。然后您可以在包含 x1.h 之前看到 source:line 设置为x1.h:1。在 include 之后 source:line 设置为x1.c:2.

这只是如何实现这一目标的一个例子。其他编译器执行类似的操作。

于 2019-10-03T19:34:35.793 回答