5

我一直在尝试使用富有表现力的模板和绳索数据结构来尝试看看可以实现什么样的收益。到目前为止,它工作得很好。但是,当尝试将超过 5 个参数连接在一起时,编译器无法正确优化并生成不必要的临时变量。有人能告诉我为什么会这样吗?是编译器限制还是我的优化选项需要调整?

我正在使用带有以下选项的 g++ 4.4.1 (mingw32):-O3 -Winline -Wextra -Wall -fno-exceptions -fno-rtti -fomit-frame-pointer -fexpensive-optimizations -fverbose-asm -S

代码如下,它只是一个实验,所以它并不真正遵循任何标准:

#include <stdio.h>

template<typename Derived>
struct rope_base {
    const Derived & ref() const;
};

struct string {
    size_t m_length;
    char * m_value;

    template<typename Derived>
    string(const rope_base<Derived> & rope);
    ~string();

    const char * data() const;
    size_t length() const;
    char * write_to(char * dst) const;
};

struct static_string {
    const char * m_value;
    const size_t m_length;

    static_string(const char * value);

    size_t length() const;
    char * write_to(char * dst) const;
};

template<typename T>
struct rope_traits {
    typedef const T type;
};

template<>
struct rope_traits<string> {
    typedef const string & type;
};

template<>
struct rope_traits<static_string> {
    typedef const static_string & type;
};

template<typename Left, typename Right>
struct rope : public rope_base<rope<Left, Right> > {
    typename rope_traits<Left>::type m_left;
    typename rope_traits<Right>::type m_right;

    rope(const Left & left, const Right & right);

    size_t length() const;
    char * write_to(char * dst) const;
};

inline static_string::static_string(const char * value)
: m_value(value)
, m_length(__builtin_strlen(m_value)) {}

inline size_t static_string::length() const {
    return m_length;
}

inline char * static_string::write_to(char * dst) const {
    __builtin_memcpy(dst, m_value, m_length);
    return dst + m_length;
}

template<typename Derived>
inline string::string(const rope_base<Derived> & rope)
: m_length(rope.ref().length())
, m_value(new char[m_length + 1]) {
    *rope.ref().write_to(m_value) = 0;
}

inline string::~string() {
    delete[] m_value;
}

inline const char * string::data() const {
    return m_value;
}

inline size_t string::length() const {
    return m_length;
}

template<typename Derived>
inline const Derived & rope_base<Derived>::ref() const {
    return static_cast<const Derived &>(*this);
}

template<typename Left, typename Right>
inline rope<Left, Right>::rope(const Left & left, const Right & right)
: m_left(left)
, m_right(right) {}

template<typename Left, typename Right>
inline size_t rope<Left, Right>::length() const {
    return m_left.length() + m_right.length();
}

template<typename Left, typename Right>
inline char * rope<Left, Right>::write_to(char * dst) const {
    return m_right.write_to(m_left.write_to(dst));
}

inline rope<static_string, static_string> operator+(const static_string & left, const static_string & right) {
    return rope<static_string, static_string>(left, right);
}

template<typename Left>
inline rope<Left, static_string> operator+(const rope_base<Left> & left, const static_string & right) {
    return rope<Left, static_string>(left.ref(), right);
}

template<typename Right>
inline rope<static_string, Right> operator+(const static_string & left, const rope_base<Right> & right) {
    return rope<static_string, Right>(left, right.ref());
}

template<typename Left, typename Right>
inline rope<Left, Right> operator+(const rope_base<Left> & left, const rope_base<Right> & right) {
    return rope<Left, Right>(left.ref(), right.ref());
}

typedef static_string ss;

int main(int, char **)
{
    // works up to 5
    string s(ss("111111111111") + "222222222222" + "333333333333" + "444444444444" + "555555555555");
    printf("%d %s\n", s.length(), s.data());
    return 0;
}

上面的代码生成了非常好的汇编程序输出,它是完全内联的,并且所有参数都被简化为常量:

.def    ___main;    .scl    2;    .type    32;    .endef
.section .rdata,"dr"
LC0:
.ascii "444444444444\0"
LC1:
.ascii "333333333333\0"
LC2:
.ascii "222222222222\0"
LC3:
.ascii "111111111111\0"
LC4:
.ascii "555555555555\0"
LC5:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def    _main;    .scl    2;    .type    32;    .endef
_main:
pushl    %ebp     #
movl    %esp, %ebp     #,
andl    $-16, %esp     #,
pushl    %edi     #
pushl    %esi     #
pushl    %ebx     #
subl    $20, %esp     #,
call    ___main     #
movl    $LC3, %esi     #, D.2495
movl    $61, (%esp)     #,
call    __Znaj     #
movl    %eax, %ebx     #, D.3126
movl    $3, %ecx     #, tmp74
movl    %eax, %edi     # D.3126, D.3125
rep movsl
leal    12(%eax), %eax     #, D.3180
movb    $3, %cl     #,
movl    %eax, %edi     # D.3180, D.3180
movl    $LC2, %esi     #, D.2496
rep movsl
leal    24(%ebx), %eax     #, D.3186
movb    $3, %cl     #,
movl    %eax, %edi     # D.3186, D.3186
movl    $LC1, %esi     #, D.2502
rep movsl
leal    36(%ebx), %eax     #, D.3192
movb    $3, %cl     #,
movl    %eax, %edi     # D.3192, D.3192
movl    $LC0, %esi     #, D.2539
rep movsl
leal    48(%ebx), %eax     #, D.3198
movl    $LC4, %esi     #, tmp87
movb    $3, %cl     #,
movl    %eax, %edi     # D.3198, D.3198
rep movsl
movb    $0, 12(%eax)     #,
movl    %ebx, 8(%esp)     # D.3126,
movl    $60, 4(%esp)     #,
movl    $LC5, (%esp)     #,
call    _printf     #
testl    %ebx, %ebx     # D.3126
je    L2     #,
movl    %ebx, (%esp)     # D.3126,
call    __ZdaPv     #
L2:
xorl    %eax, %eax     #
addl    $20, %esp     #,
popl    %ebx     #
popl    %esi     #
popl    %edi     #
leave
ret
.def    __Znaj;    .scl    2;    .type    32;    .endef
.def    _printf;    .scl    2;    .type    32;    .endef
.def    __ZdaPv;    .scl    2;    .type    32;    .endef

将一个或多个参数添加到串联时内联失败,导致临时对象被复制并且参数被视为变量:

    .def    ___main;    .scl    2;    .type    32;    .endef
    .section .rdata,"dr"
LC0:
    .ascii "777777777777\0"
LC1:
    .ascii "666666666666\0"
LC2:
    .ascii "555555555555\0"
LC3:
    .ascii "444444444444\0"
LC4:
    .ascii "333333333333\0"
LC5:
    .ascii "222222222222\0"
LC6:
    .ascii "111111111111\0"
LC7:
    .ascii "888888888888\0"
LC8:
    .ascii "%d %s\12\0"
    .text
    .p2align 2,,3
.globl _main
    .def    _main;    .scl    2;    .type    32;    .endef
_main:
    pushl    %ebp     #
    movl    %esp, %ebp     #,
    andl    $-16, %esp     #,
    pushl    %edi     #
    pushl    %esi     #
    pushl    %ebx     #
    subl    $228, %esp     #,
    call    ___main     #
    movl    $LC0, 168(%esp)     #, D.2650.m_value
    movl    $12, 172(%esp)     #, D.2650.m_length
    movl    $LC1, 176(%esp)     #, D.2613.m_value
    movl    $12, 180(%esp)     #, D.2613.m_length
    movl    $LC2, 184(%esp)     #, D.2576.m_value
    movl    $12, 188(%esp)     #, D.2576.m_length
    movl    $LC3, 192(%esp)     #, D.2539.m_value
    movl    $12, 196(%esp)     #, D.2539.m_length
    movl    $LC4, 200(%esp)     #, D.2502.m_value
    movl    $12, 204(%esp)     #, D.2502.m_length
    movl    $LC5, 208(%esp)     #, D.2496.m_value
    movl    $12, 212(%esp)     #, D.2496.m_length
    movl    $LC6, 216(%esp)     #, D.2495.m_value
    movl    $12, 220(%esp)     #, D.2495.m_length
    leal    216(%esp), %eax     #, tmp78
    movl    %eax, 152(%esp)     # tmp78, D.2571.m_left.m_left.m_left
    leal    208(%esp), %eax     #, tmp79
    movl    %eax, 156(%esp)     # tmp79, D.2571.m_left.m_left.m_right
    leal    200(%esp), %eax     #, tmp80
    movl    %eax, 160(%esp)     # tmp80, D.2571.m_left.m_right
    leal    192(%esp), %eax     #, tmp81
    movl    %eax, 164(%esp)     # tmp81, D.2571.m_right
    leal    132(%esp), %edi     #, tmp82
    leal    152(%esp), %esi     #, tmp83
    movl    $4, %ecx     #, tmp84
    rep movsl
    leal    184(%esp), %eax     #, tmp85
    movl    %eax, 148(%esp)     # tmp85, D.2608.m_right
    leal    108(%esp), %edi     #, tmp86
    leal    132(%esp), %esi     #, tmp87
    movb    $5, %cl     #,
    rep movsl
    leal    176(%esp), %eax     #, tmp89
    movl    %eax, 128(%esp)     # tmp89, D.2645.m_right
    leal    80(%esp), %edi     #, tmp90
    leal    108(%esp), %esi     #, tmp91
    movb    $6, %cl     #,
    rep movsl
    leal    168(%esp), %eax     #, tmp93
    movl    %eax, 104(%esp)     # tmp93, D.2682.m_right
    leal    48(%esp), %edi     #, tmp94
    leal    80(%esp), %esi     #, tmp95
    movb    $7, %cl     #,
    rep movsl
    movl    48(%esp), %ebx     # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_left, SR.35
    movl    52(%esp), %edx     # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_right, SR.34
    movl    56(%esp), %eax     # D.2719.m_left.m_left.m_left.m_left.m_left.m_right,
    movl    %eax, 36(%esp)     #, %sfp
    movl    60(%esp), %eax     # D.2719.m_left.m_left.m_left.m_left.m_right,
    movl    %eax, 32(%esp)     #, %sfp
    movl    64(%esp), %eax     # D.2719.m_left.m_left.m_left.m_right,
    movl    %eax, 28(%esp)     #, %sfp
    movl    68(%esp), %eax     # D.2719.m_left.m_left.m_right,
    movl    %eax, 24(%esp)     #, %sfp
    movl    72(%esp), %eax     # D.2719.m_left.m_right,
    movl    %eax, 20(%esp)     #, %sfp
    movl    4(%ebx), %eax     # <variable>.m_length, tmp97
    addl    4(%edx), %eax     # <variable>.m_length, tmp97
    addl    $12, %eax     #,
    movl    %eax, 44(%esp)     #, %sfp
    movl    36(%esp), %eax     # %sfp,
    movl    4(%eax), %eax     # <variable>.m_length,
    addl    %eax, 44(%esp)     #, %sfp
    movl    32(%esp), %eax     # %sfp,
    movl    4(%eax), %eax     # <variable>.m_length,
    addl    %eax, 44(%esp)     #, %sfp
    movl    28(%esp), %eax     # %sfp,
    movl    4(%eax), %eax     # <variable>.m_length,
    addl    %eax, 44(%esp)     #, %sfp
    movl    24(%esp), %eax     # %sfp,
    movl    4(%eax), %eax     # <variable>.m_length,
    addl    %eax, 44(%esp)     #, %sfp
    movl    20(%esp), %eax     # %sfp,
    movl    4(%eax), %eax     # <variable>.m_length,
    addl    %eax, 44(%esp)     #, %sfp
    movl    44(%esp), %eax     # %sfp, tmp105
    incl    %eax     # tmp105
    movl    %eax, (%esp)     # tmp105,
    movl    %edx, 16(%esp)     #,
    call    __Znaj     #
    movl    %eax, 40(%esp)     #, %sfp
    movl    (%ebx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%ebx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     #, D.3662
    rep movsb
    movl    40(%esp), %eax     # %sfp, D.3735
    addl    4(%ebx), %eax     # <variable>.m_length, D.3735
    movl    16(%esp), %edx     #,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3735, D.3735
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3741
    movl    36(%esp), %edx     # %sfp,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3741, D.3741
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3747
    movl    32(%esp), %edx     # %sfp,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3747, D.3747
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3753
    movl    28(%esp), %edx     # %sfp,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3753, D.3753
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3759
    movl    24(%esp), %edx     # %sfp,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3759, D.3759
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3765
    movl    20(%esp), %edx     # %sfp,
    movl    (%edx), %esi     # <variable>.m_value, <variable>.m_value
    movl    4(%edx), %ecx     # <variable>.m_length, <variable>.m_length
    movl    %eax, %edi     # D.3765, D.3765
    rep movsb
    addl    4(%edx), %eax     # <variable>.m_length, D.3771
    movl    $LC7, %esi     #, tmp148
    movb    $3, %cl     #,
    movl    %eax, %edi     # D.3771, D.3771
    rep movsl
    movb    $0, 12(%eax)     #,
    movl    40(%esp), %eax     # %sfp,
    movl    %eax, 8(%esp)     #,
    movl    44(%esp), %edx     # %sfp,
    movl    %edx, 4(%esp)     #,
    movl    $LC8, (%esp)     #,
    call    _printf     #
    movl    40(%esp), %eax     # %sfp,
    testl    %eax, %eax     #
    je    L2     #,
    movl    40(%esp), %eax     # %sfp,
    movl    %eax, (%esp)     #,
    call    __ZdaPv     #
L2:
    xorl    %eax, %eax     #
    addl    $228, %esp     #,
    popl    %ebx     #
    popl    %esi     #
    popl    %edi     #
    leave
    ret
    .def    __Znaj;    .scl    2;    .type    32;    .endef
    .def    _printf;    .scl    2;    .type    32;    .endef
    .def    __ZdaPv;    .scl    2;    .type    32;    .endef
4

2 回答 2

1

一些事情(根据评论中的讨论):

升级你的 gcc。在 4.6 系列中进行了很多优化改进,包括对内联的一些改进。

使用较新的 gcc -Winline 警告您的字符串构造函数不会被内联:

warning: inlining failed in call to 'string::string(const rope_base<Derived>&) [with Derived = rope<rope<rope<rope<rope<static_string, static_string>, static_string> static_string>, static_string>, static_string>]': call is unlikely and code size would grow [-Winline]

我不太清楚为什么 gcc 会给出这个特定的消息(这通常与不太可能的分支中的内联调用非常明显相关),但问题的根源是该构造函数中对 new() 的调用。使用静态固定大小的缓冲区会产生很好的紧凑 asm。使用 malloc 或在非内联函数调用中包装 new 允许内联构造函数(但不是 malloc/函数)。

采取哪种方法实际上取决于您的具体用例。如果在编译时可以知道字符串的长度,或者给定一个最大值,如您的示例所示,您显然可以完全内联。但在一般情况下,你总是不得不放弃一些东西。

于 2013-02-05T12:43:50.730 回答
0

正如 jmetcalfe 所澄清的那样,g++ 未能正确折叠所有临时文件的原因是由于 g++ 版本过时。从 4.4.1 升级到 4.7.2 解决了这个问题。g++ 的变更日志很可能包含有关我触发了哪些错误或添加了哪些优化以生成正确结果的信息。

于 2013-02-05T12:16:34.303 回答