2

这是一个基本的工作 C++ 程序。

#include <string>
#include <string_view>
#include <iostream>

template<typename ValueType>
struct BasicType {
    ValueType value{};
    std::string type{};

    BasicType(ValueType T, const std::string_view desc) :
    value{T}, type{desc} {}
};

template<typename T>
std::ostream& operator<<(std::ostream& out, const BasicType<T> BT ) {
    out << BT.value << BT.type;
    return out;
}

struct IntT : public BasicType<int> {
    IntT(int value, const std::string_view desc) : 
      BasicType(value, desc) 
    {}
};
struct FloatT : public BasicType<float> {
    FloatT(float value, const std::string_view desc) : 
      BasicType(value, desc) 
    {}
};

int main() {
    IntT hours(3, "hrs");
    FloatT seconds(2.5f, "s");

    std::cout << hours << " and " << seconds;

    return 0;  
}

它的输出非常明显

3hrs and 2.5s

代码没有问题,它只是一个测试样本。然而,我很好奇一些主要的不同编译器如何以不同的方式处理这段代码,以更好地了解它们的内部工作原理以及每个编译器的优缺点。

我使用 Compiler Explorer 对此进行了测试,只是为了查看生成的程序集的差异。我正在c++17通过O2优化性能速度而不是O1代码大小来编译每个。


这是从 Clang(trunk) 生成的程序集,编译器标志设置为:-std=C++17 -O2

main:                                   # @main
        push    rbp
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        sub     rsp, 168
        mov     dword ptr [rsp + 128], 3
        lea     r15, [rsp + 152]
        mov     qword ptr [rsp + 136], r15
        mov     dword ptr [rsp + 152], 7565928
        mov     qword ptr [rsp + 144], 3
        mov     dword ptr [rsp + 48], 1075838976
        lea     r12, [rsp + 72]
        mov     qword ptr [rsp + 56], r12
        mov     word ptr [rsp + 72], 115
        mov     qword ptr [rsp + 64], 1
        mov     dword ptr [rsp + 88], 3
        lea     r13, [rsp + 112]
        mov     qword ptr [rsp + 96], r13
        mov     dword ptr [rsp + 112], 7565928
        mov     qword ptr [rsp + 104], 3
        mov     edi, offset std::cout
        mov     esi, 3
        call    std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
        mov     rsi, qword ptr [rsp + 96]
        mov     rdx, qword ptr [rsp + 104]
        mov     rdi, rax
        call    std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
        mov     edi, offset std::cout
        mov     esi, offset .L.str.2
        mov     edx, 5
        call    std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
        movss   xmm0, dword ptr [rsp + 48]      # xmm0 = mem[0],zero,zero,zero
        movss   dword ptr [rsp + 8], xmm0
        lea     rbp, [rsp + 32]
        mov     qword ptr [rsp + 16], rbp
        mov     r14, qword ptr [rsp + 56]
        mov     rbx, qword ptr [rsp + 64]
        mov     qword ptr [rsp], rbx
        cmp     rbx, 15
        jbe     .LBB0_4
        lea     rdi, [rsp + 16]
        mov     rsi, rsp
        xor     edx, edx
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create(unsigned long&, unsigned long)
        mov     qword ptr [rsp + 16], rax
        mov     rcx, qword ptr [rsp]
        mov     qword ptr [rsp + 32], rcx
        test    rbx, rbx
        jne     .LBB0_8
        jmp     .LBB0_11
.LBB0_4:
        mov     rax, rbp
        test    rbx, rbx
        je      .LBB0_11
.LBB0_8:
        cmp     rbx, 1
        jne     .LBB0_10
        mov     cl, byte ptr [r14]
        mov     byte ptr [rax], cl
        jmp     .LBB0_11
.LBB0_10:
        mov     rdi, rax
        mov     rsi, r14
        mov     rdx, rbx
        call    memcpy@PLT
.LBB0_11:
        mov     rax, qword ptr [rsp]
        mov     qword ptr [rsp + 24], rax
        mov     rcx, qword ptr [rsp + 16]
        mov     byte ptr [rcx + rax], 0
        movss   xmm0, dword ptr [rsp + 8]       # xmm0 = mem[0],zero,zero,zero
        cvtss2sd        xmm0, xmm0
        mov     edi, offset std::cout
        call    std::basic_ostream<char, std::char_traits<char> >& std::basic_ostream<char, std::char_traits<char> >::_M_insert<double>(double)
        mov     rsi, qword ptr [rsp + 16]
        mov     rdx, qword ptr [rsp + 24]
        mov     rdi, rax
        call    std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
        mov     rdi, qword ptr [rsp + 16]
        cmp     rdi, rbp
        je      .LBB0_15
        call    operator delete(void*)
.LBB0_15:
        mov     rdi, qword ptr [rsp + 96]
        cmp     rdi, r13
        je      .LBB0_17
        call    operator delete(void*)
.LBB0_17:
        mov     rdi, qword ptr [rsp + 56]
        cmp     rdi, r12
        je      .LBB0_19
        call    operator delete(void*)
.LBB0_19:
        mov     rdi, qword ptr [rsp + 136]
        cmp     rdi, r15
        je      .LBB0_21
        call    operator delete(void*)
.LBB0_21:
        xor     eax, eax
        add     rsp, 168
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        pop     rbp
        ret
        mov     rbx, rax
        mov     rdi, qword ptr [rsp + 16]
        cmp     rdi, rbp
        je      .LBB0_25
        call    operator delete(void*)
        jmp     .LBB0_25
        mov     rbx, rax
.LBB0_25:
        mov     rdi, qword ptr [rsp + 96]
        cmp     rdi, r13
        je      .LBB0_27
        call    operator delete(void*)
.LBB0_27:
        mov     rdi, qword ptr [rsp + 56]
        cmp     rdi, r12
        je      .LBB0_29
        call    operator delete(void*)
.LBB0_29:
        mov     rdi, qword ptr [rsp + 136]
        cmp     rdi, r15
        je      .LBB0_31
        call    operator delete(void*)
.LBB0_31:
        mov     rdi, rbx
        call    _Unwind_Resume@PLT
_GLOBAL__sub_I_example.cpp:             # @_GLOBAL__sub_I_example.cpp
        push    rax
        mov     edi, offset std::__ioinit
        call    std::ios_base::Init::Init() [complete object constructor]
        mov     edi, offset std::ios_base::Init::~Init() [complete object destructor]
        mov     esi, offset std::__ioinit
        mov     edx, offset __dso_handle
        pop     rax
        jmp     __cxa_atexit                    # TAILCALL

.L.str.2:
        .asciz  " and "

Clang 正在生成 147 条装配线。


这是 GCC(trunk) 生成的程序集,编译器选项设置为:-std=c++17 -O2

.LC0:
        .string "hrs"
.LC2:
        .string "s"
.LC3:
        .string " and "
main:
        push    rbx
        mov     edx, OFFSET FLAT:.LC0+3
        mov     esi, OFFSET FLAT:.LC0
        sub     rsp, 192
        lea     rax, [rsp+24]
        lea     rdi, [rsp+8]
        mov     DWORD PTR [rsp], 3
        mov     QWORD PTR [rsp+8], rax
        call    void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag) [clone .isra.0]
        lea     rax, [rsp+72]
        mov     edx, OFFSET FLAT:.LC2+1
        mov     esi, OFFSET FLAT:.LC2
        lea     rdi, [rsp+56]
        mov     DWORD PTR [rsp+48], 0x40200000
        mov     QWORD PTR [rsp+56], rax
        call    void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag) [clone .isra.0]
        mov     eax, DWORD PTR [rsp]
        lea     rsi, [rsp+8]
        lea     rdi, [rsp+104]
        mov     DWORD PTR [rsp+96], eax
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) [complete object constructor]
        mov     esi, DWORD PTR [rsp+96]
        mov     edi, OFFSET FLAT:_ZSt4cout
        call    std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
        mov     rdx, QWORD PTR [rsp+112]
        mov     rsi, QWORD PTR [rsp+104]
        mov     rdi, rax
        call    std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
        mov     esi, OFFSET FLAT:.LC3
        mov     edi, OFFSET FLAT:_ZSt4cout
        call    std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)
        movss   xmm0, DWORD PTR [rsp+48]
        lea     rsi, [rsp+56]
        lea     rdi, [rsp+152]
        mov     rbx, rax
        movss   DWORD PTR [rsp+144], xmm0
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) [complete object constructor]
        pxor    xmm0, xmm0
        mov     rdi, rbx
        cvtss2sd        xmm0, DWORD PTR [rsp+144]
        call    std::basic_ostream<char, std::char_traits<char> >& std::basic_ostream<char, std::char_traits<char> >::_M_insert<double>(double)
        mov     rdx, QWORD PTR [rsp+160]
        mov     rsi, QWORD PTR [rsp+152]
        mov     rdi, rax
        call    std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
        lea     rdi, [rsp+152]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
        lea     rdi, [rsp+104]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
        lea     rdi, [rsp+56]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
        lea     rdi, [rsp+8]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
        add     rsp, 192
        xor     eax, eax
        pop     rbx
        ret
        mov     rbx, rax
        jmp     .L10
        mov     rbx, rax
        jmp     .L7
        mov     rbx, rax
        jmp     .L8
        mov     rbx, rax
        jmp     .L9
main.cold:
        lea     rdi, [rsp+152]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
.L8:
        lea     rdi, [rsp+104]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
.L9:
        lea     rdi, [rsp+56]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
.L10:
        lea     rdi, [rsp+8]
        call    std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
        mov     rdi, rbx
        call    _Unwind_Resume
_GLOBAL__sub_I_main:
        sub     rsp, 8
        mov     edi, OFFSET FLAT:_ZStL8__ioinit
        call    std::ios_base::Init::Init() [complete object constructor]
        mov     edx, OFFSET FLAT:__dso_handle
        mov     esi, OFFSET FLAT:_ZStL8__ioinit
        mov     edi, OFFSET FLAT:_ZNSt8ios_base4InitD1Ev
        add     rsp, 8
        jmp     __cxa_atexit

这会产生 95 行汇编指令。


最后,我们将编译器选项设置为 MSVC-x64 msvc v19.latest,/std:c++17 /O2不用说它太大了,无法直接在此处发布,所以这里是 MSVC 选项编译器资源管理器的链接......这令人惊讶地产生了 1600 多一点代码行。


从 GCC 到 Clang 从大约 95 到 150 行指令跳转是一回事,但跳转到超过 1500 行以上是另一回事。是什么赋予了?这是编译器本身之间生成的汇编指令的主要区别以及 MSVC 如何实现其 C++17 语言版本,还是与编译器资源管理器如何与它们如何实现每个编译器有关?为什么指令会有如此戏剧性的跳跃?

4

0 回答 0