4

x86-64我对 Mac OS X 平台上的汇编编程很感兴趣。我遇到了这个关于创建 248B Mach-O 程序的页面,这让我找到了Apple 自己的 Mach-O 格式参考。之后我想我会在 Xcode 中制作同样简单的 C 程序并检查生成的程序集。

这是代码:

int main(int argc, const char * argv[])
{
    return 42;
}

但是生成的程序集是 334 行,包含(基于 248B 模型)很多多余的内容。

首先,为什么 C 可执行文件的发布版本中包含这么多 DWARF 调试信息?其次,我注意到 Mach-O 标头数据包含 4 次(在不同的 DWARF-related 中sections)。为什么这是必要的?最后,Xcode 程序集包括:

.private_extern _main
.globl  _main
_main:
    .cfi_startproc

但在 248B 程序中,这些都无处可见——程序反而从_start. 如果根据定义所有程序都开始于 ,那怎么可能main


完整的 Xcode 组装:

# Assembly output for main.c
# Generated at 4:04:08 PM on Sunday, January 20, 2013
# Using Release configuration, x86_64 architecture for Tiny target of Tiny project

    .section    __TEXT,__text,regular,pure_instructions
    .file   1 "/Users/####/Desktop/Tiny/Tiny/main.c"
    .section    __DWARF,__debug_info,regular,debug
Lsection_info:
    .section    __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
    .section    __DWARF,__debug_aranges,regular,debug
    .section    __DWARF,__debug_macinfo,regular,debug
    .section    __DWARF,__debug_line,regular,debug
Lsection_line:
    .section    __DWARF,__debug_loc,regular,debug
    .section    __DWARF,__debug_pubtypes,regular,debug
    .section    __DWARF,__debug_str,regular,debug
Lsection_str:
    .section    __DWARF,__debug_ranges,regular,debug
Ldebug_range:
    .section    __DWARF,__debug_loc,regular,debug
Lsection_debug_loc:
    .section    __TEXT,__text,regular,pure_instructions
Ltext_begin:
    .section    __DATA,__data
    .section    __TEXT,__text,regular,pure_instructions
    .private_extern _main
    .globl  _main
_main:                                  ## @main
    .cfi_startproc
Lfunc_begin0:
    .loc    1 12 0                  ## /Users/####/Desktop/Tiny/Tiny/main.c:12:0
## BB#0:
    pushq   %rbp
Ltmp2:
    .cfi_def_cfa_offset 16
Ltmp3:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp4:
    .cfi_def_cfa_register %rbp
    ##DEBUG_VALUE: main:argc <- EDI+0
    ##DEBUG_VALUE: main:argv <- RSI+0
    movl    $42, %eax
    .loc    1 15 5 prologue_end     ## /Users/####/Desktop/Tiny/Tiny/main.c:15:5
Ltmp5:
    popq    %rbp
    ret
Ltmp6:
Lfunc_end0:
    .cfi_endproc

Ltext_end:
    .section    __DATA,__data
Ldata_end:
    .section    __TEXT,__text,regular,pure_instructions
Lsection_end1:
    .section    __DWARF,__debug_info,regular,debug
Linfo_begin1:
    .long   127                     ## Length of Compilation Unit Info
    .short  2                       ## DWARF version number
Lset0 = Labbrev_begin-Lsection_abbrev   ## Offset Into Abbrev. Section
    .long   Lset0
    .byte   8                       ## Address Size (in bytes)
    .byte   1                       ## Abbrev [1] 0xb:0x78 DW_TAG_compile_unit
Lset1 = Lstring0-Lsection_str           ## DW_AT_producer
    .long   Lset1
    .short  12                      ## DW_AT_language
Lset2 = Lstring1-Lsection_str           ## DW_AT_name
    .long   Lset2
    .quad   0                       ## DW_AT_entry_pc
    .long   0                       ## DW_AT_stmt_list
Lset3 = Lstring2-Lsection_str           ## DW_AT_comp_dir
    .long   Lset3
    .byte   1                       ## DW_AT_APPLE_optimized
    .byte   2                       ## Abbrev [2] 0x27:0x3e DW_TAG_subprogram
Lset4 = Lstring3-Lsection_str           ## DW_AT_name
    .long   Lset4
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .byte   1                       ## DW_AT_prototyped
    .long   101                     ## DW_AT_type
    .byte   1                       ## DW_AT_external
    .quad   Lfunc_begin0            ## DW_AT_low_pc
    .quad   Lfunc_end0              ## DW_AT_high_pc
    .byte   1                       ## DW_AT_frame_base
    .byte   86
    .byte   3                       ## Abbrev [3] 0x46:0xf DW_TAG_formal_parameter
Lset5 = Lstring5-Lsection_str           ## DW_AT_name
    .long   Lset5
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .long   101                     ## DW_AT_type
Lset6 = Ldebug_loc0-Lsection_debug_loc  ## DW_AT_location
    .long   Lset6
    .byte   3                       ## Abbrev [3] 0x55:0xf DW_TAG_formal_parameter
Lset7 = Lstring6-Lsection_str           ## DW_AT_name
    .long   Lset7
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .long   125                     ## DW_AT_type
Lset8 = Ldebug_loc2-Lsection_debug_loc  ## DW_AT_location
    .long   Lset8
    .byte   0                       ## End Of Children Mark
    .byte   4                       ## Abbrev [4] 0x65:0x7 DW_TAG_base_type
Lset9 = Lstring4-Lsection_str           ## DW_AT_name
    .long   Lset9
    .byte   5                       ## DW_AT_encoding
    .byte   4                       ## DW_AT_byte_size
    .byte   4                       ## Abbrev [4] 0x6c:0x7 DW_TAG_base_type
Lset10 = Lstring7-Lsection_str          ## DW_AT_name
    .long   Lset10
    .byte   6                       ## DW_AT_encoding
    .byte   1                       ## DW_AT_byte_size
    .byte   5                       ## Abbrev [5] 0x73:0x5 DW_TAG_const_type
    .long   108                     ## DW_AT_type
    .byte   6                       ## Abbrev [6] 0x78:0x5 DW_TAG_pointer_type
    .long   115                     ## DW_AT_type
    .byte   6                       ## Abbrev [6] 0x7d:0x5 DW_TAG_pointer_type
    .long   120                     ## DW_AT_type
    .byte   0                       ## End Of Children Mark
Linfo_end1:
    .section    __DWARF,__debug_abbrev,regular,debug
Labbrev_begin:
    .byte   1                       ## Abbreviation Code
    .byte   17                      ## DW_TAG_compile_unit
    .byte   1                       ## DW_CHILDREN_yes
    .byte   37                      ## DW_AT_producer
    .byte   14                      ## DW_FORM_strp
    .byte   19                      ## DW_AT_language
    .byte   5                       ## DW_FORM_data2
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   82                      ## DW_AT_entry_pc
    .byte   1                       ## DW_FORM_addr
    .byte   16                      ## DW_AT_stmt_list
    .byte   6                       ## DW_FORM_data4
    .byte   27                      ## DW_AT_comp_dir
    .byte   14                      ## DW_FORM_strp
    .ascii   "\341\177"             ## DW_AT_APPLE_optimized
    .byte   12                      ## DW_FORM_flag
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   2                       ## Abbreviation Code
    .byte   46                      ## DW_TAG_subprogram
    .byte   1                       ## DW_CHILDREN_yes
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   58                      ## DW_AT_decl_file
    .byte   11                      ## DW_FORM_data1
    .byte   59                      ## DW_AT_decl_line
    .byte   11                      ## DW_FORM_data1
    .byte   39                      ## DW_AT_prototyped
    .byte   12                      ## DW_FORM_flag
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   63                      ## DW_AT_external
    .byte   12                      ## DW_FORM_flag
    .byte   17                      ## DW_AT_low_pc
    .byte   1                       ## DW_FORM_addr
    .byte   18                      ## DW_AT_high_pc
    .byte   1                       ## DW_FORM_addr
    .byte   64                      ## DW_AT_frame_base
    .byte   10                      ## DW_FORM_block1
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   3                       ## Abbreviation Code
    .byte   5                       ## DW_TAG_formal_parameter
    .byte   0                       ## DW_CHILDREN_no
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   58                      ## DW_AT_decl_file
    .byte   11                      ## DW_FORM_data1
    .byte   59                      ## DW_AT_decl_line
    .byte   11                      ## DW_FORM_data1
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   2                       ## DW_AT_location
    .byte   6                       ## DW_FORM_data4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   4                       ## Abbreviation Code
    .byte   36                      ## DW_TAG_base_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   62                      ## DW_AT_encoding
    .byte   11                      ## DW_FORM_data1
    .byte   11                      ## DW_AT_byte_size
    .byte   11                      ## DW_FORM_data1
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   5                       ## Abbreviation Code
    .byte   38                      ## DW_TAG_const_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   6                       ## Abbreviation Code
    .byte   15                      ## DW_TAG_pointer_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   0                       ## EOM(3)
Labbrev_end:
    .section    __DWARF,__apple_names,regular,debug
Lnames_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   1                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   0                       ## Bucket 0
    .long   2090499946              ## Hash in Bucket 0
    .long   LNames0-Lnames_begin    ## Offset in Bucket 0
LNames0:
Lset11 = Lstring3-Lsection_str          ## main
    .long   Lset11
    .long   1                       ## Num DIEs
    .long   39
    .long   0
    .section    __DWARF,__apple_objc,regular,debug
Lobjc_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   0                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   -1                      ## Bucket 0
    .section    __DWARF,__apple_namespac,regular,debug
Lnamespac_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   0                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   -1                      ## Bucket 0
    .section    __DWARF,__apple_types,regular,debug
Ltypes_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   2                       ## Header Bucket Count
    .long   2                       ## Header Hash Count
    .long   20                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   3                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .short  3                       ## eAtomTypeTag
    .short  5                       ## DW_FORM_data2
    .short  5                       ## eAtomTypeTypeFlags
    .short  11                      ## DW_FORM_data1
    .long   0                       ## Bucket 0
    .long   1                       ## Bucket 1
    .long   193495088               ## Hash in Bucket 0
    .long   2090147939              ## Hash in Bucket 1
    .long   Ltypes0-Ltypes_begin    ## Offset in Bucket 0
    .long   Ltypes1-Ltypes_begin    ## Offset in Bucket 1
Ltypes0:
Lset12 = Lstring4-Lsection_str          ## int
    .long   Lset12
    .long   1                       ## Num DIEs
    .long   101
    .short  36
    .byte   0
    .long   0
Ltypes1:
Lset13 = Lstring7-Lsection_str          ## char
    .long   Lset13
    .long   1                       ## Num DIEs
    .long   108
    .short  36
    .byte   0
    .long   0
    .section    __DWARF,__debug_pubtypes,regular,debug
Lset14 = Lpubtypes_end1-Lpubtypes_begin1 ## Length of Public Types Info
    .long   Lset14
Lpubtypes_begin1:
    .short  2                       ## DWARF Version
Lset15 = Linfo_begin1-Lsection_info     ## Offset of Compilation Unit Info
    .long   Lset15
Lset16 = Linfo_end1-Linfo_begin1        ## Compilation Unit Length
    .long   Lset16
    .long   0                       ## End Mark
Lpubtypes_end1:
    .section    __DWARF,__debug_loc,regular,debug
Ldebug_loc0:
    .quad   Lfunc_begin0
    .quad   Ltmp6
Lset17 = Ltmp8-Ltmp7                    ## Loc expr size
    .short  Lset17
Ltmp7:
    .byte   85                      ## DW_OP_reg5
Ltmp8:
    .quad   0
    .quad   0
Ldebug_loc2:
    .quad   Lfunc_begin0
    .quad   Ltmp6
Lset18 = Ltmp10-Ltmp9                   ## Loc expr size
    .short  Lset18
Ltmp9:
    .byte   84                      ## DW_OP_reg4
Ltmp10:
    .quad   0
    .quad   0
Ldebug_loc4:
    .section    __DWARF,__debug_aranges,regular,debug
    .section    __DWARF,__debug_ranges,regular,debug
    .section    __DWARF,__debug_macinfo,regular,debug
    .section    __DWARF,__debug_inlined,regular,debug
Lset19 = Ldebug_inlined_end1-Ldebug_inlined_begin1 ## Length of Debug Inlined Information Entry
    .long   Lset19
Ldebug_inlined_begin1:
    .short  2                       ## Dwarf Version
    .byte   8                       ## Address Size (in bytes)
Ldebug_inlined_end1:
    .section    __DWARF,__debug_str,regular,debug
Lstring0:
    .asciz   "Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)"
Lstring1:
    .asciz   "/Users/####/Desktop/Tiny/Tiny/main.c"
Lstring2:
    .asciz   "/Users/####/Desktop/Tiny"
Lstring3:
    .asciz   "main"
Lstring4:
    .asciz   "int"
Lstring5:
    .asciz   "argc"
Lstring6:
    .asciz   "argv"
Lstring7:
    .asciz   "char"

.subsections_via_symbols
4

1 回答 1

15

首先,为什么 C 可执行文件的发布版本中包含这么多 DWARF 调试信息?

能够调试优化的代码非常有用。错误仅在优化构建中可见的情况并不少见。如果您正在手写程序集,那么您不太可能关心 DWARF 信息,因此我建议您在没有-g参数的情况下构建您的比较代码。


其次,我注意到 Mach-O 标头数据包含 4 次(在不同的 DWARF 相关部分中)。为什么这是必要的?

这些不是您看到的 Mach-O 标头。它们是DWARF 加速器表的标头,它是 DWARF的 LLVM 扩展,可优化符号是否在给定编译单元中定义的测试。


但在 248B 程序中,这些都无处可见 - 程序反而从 _start 开始。如果根据定义,所有程序都从 main 开始,这怎么可能?

从历史上看,在 OS X 上,所有程序都以start. 但是,此符号通常来自系统库,而不是由程序本身定义。的系统实现start将执行一些初始化,然后跳转到您的程序“真正的”入口点。

Mach-O 二进制文件的入口点由LC_UNIXTHREADor 或LC_MAINload 命令定义。当LC_UNIXTHREAD10.8 之前的 OS X 版本的约定与链接器start用作入口点的常规 C 或 C++ 程序一起使用时。该符号通常来自/usr/lib/crt1.o,其地址写入LC_UNIXTHREAD加载命令的指令指针字段。您链接到的 248B 二进制文件包含一个设置为 0x000010e8的LC_UNIXTHREAD命令。eip那是符号的地址_start。由于这个小程序是一个静态可执行文件,并且二进制文件是直接生成的,因此它可以将它希望写入的任何地址写入加载命令的指令指针字段。

如果您正在构建针对 OS X 10.8+ 的可执行文件,链接器将生成一个LC_MAIN加载命令而不是LC_UNIXTHREAD. 内核知道使用该LC_MAIN命令的二进制文件应该通过加载动态链接器并跳转到其入口点来执行。动态链接器 dyld 初始化自身,然后跳转到LC_MAIN命令中指定的地址。在这个勇敢的新世界中,根本没有start使用命名的符号。

于 2013-01-20T07:30:00.993 回答