c - 调用 printf 时的堆栈视图？

Question

我刚刚了解了让我问这个问题的格式字符串漏洞

考虑以下简单程序：

#include<stdio.h>
void main(int argc, char **argv)
{
char *s="SomeString";
printf(argv[1]);
}

现在很明显，这段代码容易受到格式字符串漏洞的影响。即，当命令行参数为 %s 时，会打印值 SomeString，因为 printf 会弹出堆栈一次。

我不明白的是调用 printf 时的堆栈结构

在我的脑海中，我想象堆栈如下：

从左到右增长----->

main()                                                                  ---> printf()-->
RET to libc_main | address of 's' | current registers| ret ptr to main | ptr to format string|

如果是这种情况，将 %s 输入到程序中如何导致 s 的值被弹出？

（或）如果我对堆栈结构完全错误，请纠正我

score 4 · Accepted Answer

堆栈内容很大程度上取决于以下内容：

中央处理器
编译器
调用约定（即参数如何在寄存器和堆栈中传递）
编译器执行的代码优化

这是我通过使用 x86 mingw 编译你的小程序得到的gcc stk.c -S -o stk.s：

        .file   "stk.c"
        .def    ___main;        .scl    2;      .type   32;     .endef
        .section .rdata,"dr"
LC0:
        .ascii "SomeString\0"
        .text
        .globl  _main
        .def    _main;  .scl    2;      .type   32;     .endef
_main:
LFB6:
        .cfi_startproc
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        andl    $-16, %esp
        subl    $32, %esp
        call    ___main
        movl    $LC0, 28(%esp)
        movl    12(%ebp), %eax
        addl    $4, %eax
        movl    (%eax), %eax
        movl    %eax, (%esp)
        call    _printf
        leave
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
LFE6:
        .def    _printf;        .scl    2;      .type   32;     .endef

这就是我使用gcc stk.c -S -O2 -o stk.s的，也就是说，启用了优化：

        .file   "stk.c"
        .def    ___main;        .scl    2;      .type   32;     .endef
        .section        .text.startup,"x"
        .p2align 2,,3
        .globl  _main
        .def    _main;  .scl    2;      .type   32;     .endef
_main:
LFB7:
        .cfi_startproc
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        andl    $-16, %esp
        subl    $16, %esp
        call    ___main
        movl    12(%ebp), %eax
        movl    4(%eax), %eax
        movl    %eax, (%esp)
        call    _printf
        leave
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
LFE7:
        .def    _printf;        .scl    2;      .type   32;     .endef

如您所见，在后一种情况下，堆栈上没有指向“SomeString”的指针。事实上，该字符串甚至不存在于已编译的代码中。

在这个简单的代码中，没有寄存器保存在堆栈上，因为没有任何变量分配给需要在调用printf().

所以，你在堆栈上唯一的东西是字符串指针（可选），由于堆栈对齐而未使用的空间（andl $-16, %esp+subl $32, %esp对齐堆栈并为局部变量分配空间，这里没有），printf()'s参数，返回地址从printf()回来到main()。

在前一种情况下，指向“SomeString”的指针和printf()的参数（的值argv[1]）彼此相距很远：

        movl    $LC0, 28(%esp) ; address of "SomeString" is at esp+28
        movl    12(%ebp), %eax
        addl    $4, %eax
        movl    (%eax), %eax
        movl    %eax, (%esp) ; address of a copy of argv[1] is at esp
        call    _printf

要使两个地址一个接一个地存储在堆栈上，如果这是您想要的，您需要使用代码、编译/优化选项或使用不同的编译器。

或者你可以提供一个格式字符串，argv[1]这样printf()就可以到达它。例如，您可以在格式字符串中包含许多虚假参数。

例如，如果我使用编译这段代码gcc stk.c -o stk.exe并运行它stk.exe %u%u%u%u%u%u%s，我会从中得到以下输出：

4200532268676042006264200532880015253SomeString

所有这一切都非常 hacky，让它正常工作并非易事。

score 0 · Accepted Answer

在 x86 上，函数调用的堆栈可能类似于：

          :              :
          +--------------+
          :  alignment   :
          +--------------+
12(%ebp)  |     arg2     |
          +--------------+
 8(%ebp)  |     arg1     |
          +--------------+
 4(%ebp)  |     ret      | -----> return address
          +--------------+
  (%ebp)  |     ebp      | -----> previous ebp value
          +--------------+
-4(%ebp)  |    local1    | -----> local vars, sometimes they can overflow ;-)
          +--------------+        
          :  alignment   :
          +--------------+
          :              :

如果你用过-fomit-frame-pointer ebp就不会保存在堆栈上。在不同的优化级别，一些变量可能会消失（被优化），...

其他 ABI 将函数参数存储在寄存器上，而不是将它们保存在堆栈中。稍后，在调用另一个函数之前，活动寄存器可能会溢出到堆栈中。

c - 调用 printf 时的堆栈视图？

2 回答 2

Related

Reference