我一直在尝试使用富有表现力的模板和绳索数据结构来尝试看看可以实现什么样的收益。到目前为止,它工作得很好。但是,当尝试将超过 5 个参数连接在一起时,编译器无法正确优化并生成不必要的临时变量。有人能告诉我为什么会这样吗?是编译器限制还是我的优化选项需要调整?
我正在使用带有以下选项的 g++ 4.4.1 (mingw32):-O3 -Winline -Wextra -Wall -fno-exceptions -fno-rtti -fomit-frame-pointer -fexpensive-optimizations -fverbose-asm -S
代码如下,它只是一个实验,所以它并不真正遵循任何标准:
#include <stdio.h>
template<typename Derived>
struct rope_base {
const Derived & ref() const;
};
struct string {
size_t m_length;
char * m_value;
template<typename Derived>
string(const rope_base<Derived> & rope);
~string();
const char * data() const;
size_t length() const;
char * write_to(char * dst) const;
};
struct static_string {
const char * m_value;
const size_t m_length;
static_string(const char * value);
size_t length() const;
char * write_to(char * dst) const;
};
template<typename T>
struct rope_traits {
typedef const T type;
};
template<>
struct rope_traits<string> {
typedef const string & type;
};
template<>
struct rope_traits<static_string> {
typedef const static_string & type;
};
template<typename Left, typename Right>
struct rope : public rope_base<rope<Left, Right> > {
typename rope_traits<Left>::type m_left;
typename rope_traits<Right>::type m_right;
rope(const Left & left, const Right & right);
size_t length() const;
char * write_to(char * dst) const;
};
inline static_string::static_string(const char * value)
: m_value(value)
, m_length(__builtin_strlen(m_value)) {}
inline size_t static_string::length() const {
return m_length;
}
inline char * static_string::write_to(char * dst) const {
__builtin_memcpy(dst, m_value, m_length);
return dst + m_length;
}
template<typename Derived>
inline string::string(const rope_base<Derived> & rope)
: m_length(rope.ref().length())
, m_value(new char[m_length + 1]) {
*rope.ref().write_to(m_value) = 0;
}
inline string::~string() {
delete[] m_value;
}
inline const char * string::data() const {
return m_value;
}
inline size_t string::length() const {
return m_length;
}
template<typename Derived>
inline const Derived & rope_base<Derived>::ref() const {
return static_cast<const Derived &>(*this);
}
template<typename Left, typename Right>
inline rope<Left, Right>::rope(const Left & left, const Right & right)
: m_left(left)
, m_right(right) {}
template<typename Left, typename Right>
inline size_t rope<Left, Right>::length() const {
return m_left.length() + m_right.length();
}
template<typename Left, typename Right>
inline char * rope<Left, Right>::write_to(char * dst) const {
return m_right.write_to(m_left.write_to(dst));
}
inline rope<static_string, static_string> operator+(const static_string & left, const static_string & right) {
return rope<static_string, static_string>(left, right);
}
template<typename Left>
inline rope<Left, static_string> operator+(const rope_base<Left> & left, const static_string & right) {
return rope<Left, static_string>(left.ref(), right);
}
template<typename Right>
inline rope<static_string, Right> operator+(const static_string & left, const rope_base<Right> & right) {
return rope<static_string, Right>(left, right.ref());
}
template<typename Left, typename Right>
inline rope<Left, Right> operator+(const rope_base<Left> & left, const rope_base<Right> & right) {
return rope<Left, Right>(left.ref(), right.ref());
}
typedef static_string ss;
int main(int, char **)
{
// works up to 5
string s(ss("111111111111") + "222222222222" + "333333333333" + "444444444444" + "555555555555");
printf("%d %s\n", s.length(), s.data());
return 0;
}
上面的代码生成了非常好的汇编程序输出,它是完全内联的,并且所有参数都被简化为常量:
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "444444444444\0"
LC1:
.ascii "333333333333\0"
LC2:
.ascii "222222222222\0"
LC3:
.ascii "111111111111\0"
LC4:
.ascii "555555555555\0"
LC5:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp #
movl %esp, %ebp #,
andl $-16, %esp #,
pushl %edi #
pushl %esi #
pushl %ebx #
subl $20, %esp #,
call ___main #
movl $LC3, %esi #, D.2495
movl $61, (%esp) #,
call __Znaj #
movl %eax, %ebx #, D.3126
movl $3, %ecx #, tmp74
movl %eax, %edi # D.3126, D.3125
rep movsl
leal 12(%eax), %eax #, D.3180
movb $3, %cl #,
movl %eax, %edi # D.3180, D.3180
movl $LC2, %esi #, D.2496
rep movsl
leal 24(%ebx), %eax #, D.3186
movb $3, %cl #,
movl %eax, %edi # D.3186, D.3186
movl $LC1, %esi #, D.2502
rep movsl
leal 36(%ebx), %eax #, D.3192
movb $3, %cl #,
movl %eax, %edi # D.3192, D.3192
movl $LC0, %esi #, D.2539
rep movsl
leal 48(%ebx), %eax #, D.3198
movl $LC4, %esi #, tmp87
movb $3, %cl #,
movl %eax, %edi # D.3198, D.3198
rep movsl
movb $0, 12(%eax) #,
movl %ebx, 8(%esp) # D.3126,
movl $60, 4(%esp) #,
movl $LC5, (%esp) #,
call _printf #
testl %ebx, %ebx # D.3126
je L2 #,
movl %ebx, (%esp) # D.3126,
call __ZdaPv #
L2:
xorl %eax, %eax #
addl $20, %esp #,
popl %ebx #
popl %esi #
popl %edi #
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef
将一个或多个参数添加到串联时内联失败,导致临时对象被复制并且参数被视为变量:
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "777777777777\0"
LC1:
.ascii "666666666666\0"
LC2:
.ascii "555555555555\0"
LC3:
.ascii "444444444444\0"
LC4:
.ascii "333333333333\0"
LC5:
.ascii "222222222222\0"
LC6:
.ascii "111111111111\0"
LC7:
.ascii "888888888888\0"
LC8:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp #
movl %esp, %ebp #,
andl $-16, %esp #,
pushl %edi #
pushl %esi #
pushl %ebx #
subl $228, %esp #,
call ___main #
movl $LC0, 168(%esp) #, D.2650.m_value
movl $12, 172(%esp) #, D.2650.m_length
movl $LC1, 176(%esp) #, D.2613.m_value
movl $12, 180(%esp) #, D.2613.m_length
movl $LC2, 184(%esp) #, D.2576.m_value
movl $12, 188(%esp) #, D.2576.m_length
movl $LC3, 192(%esp) #, D.2539.m_value
movl $12, 196(%esp) #, D.2539.m_length
movl $LC4, 200(%esp) #, D.2502.m_value
movl $12, 204(%esp) #, D.2502.m_length
movl $LC5, 208(%esp) #, D.2496.m_value
movl $12, 212(%esp) #, D.2496.m_length
movl $LC6, 216(%esp) #, D.2495.m_value
movl $12, 220(%esp) #, D.2495.m_length
leal 216(%esp), %eax #, tmp78
movl %eax, 152(%esp) # tmp78, D.2571.m_left.m_left.m_left
leal 208(%esp), %eax #, tmp79
movl %eax, 156(%esp) # tmp79, D.2571.m_left.m_left.m_right
leal 200(%esp), %eax #, tmp80
movl %eax, 160(%esp) # tmp80, D.2571.m_left.m_right
leal 192(%esp), %eax #, tmp81
movl %eax, 164(%esp) # tmp81, D.2571.m_right
leal 132(%esp), %edi #, tmp82
leal 152(%esp), %esi #, tmp83
movl $4, %ecx #, tmp84
rep movsl
leal 184(%esp), %eax #, tmp85
movl %eax, 148(%esp) # tmp85, D.2608.m_right
leal 108(%esp), %edi #, tmp86
leal 132(%esp), %esi #, tmp87
movb $5, %cl #,
rep movsl
leal 176(%esp), %eax #, tmp89
movl %eax, 128(%esp) # tmp89, D.2645.m_right
leal 80(%esp), %edi #, tmp90
leal 108(%esp), %esi #, tmp91
movb $6, %cl #,
rep movsl
leal 168(%esp), %eax #, tmp93
movl %eax, 104(%esp) # tmp93, D.2682.m_right
leal 48(%esp), %edi #, tmp94
leal 80(%esp), %esi #, tmp95
movb $7, %cl #,
rep movsl
movl 48(%esp), %ebx # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_left, SR.35
movl 52(%esp), %edx # D.2719.m_left.m_left.m_left.m_left.m_left.m_left.m_right, SR.34
movl 56(%esp), %eax # D.2719.m_left.m_left.m_left.m_left.m_left.m_right,
movl %eax, 36(%esp) #, %sfp
movl 60(%esp), %eax # D.2719.m_left.m_left.m_left.m_left.m_right,
movl %eax, 32(%esp) #, %sfp
movl 64(%esp), %eax # D.2719.m_left.m_left.m_left.m_right,
movl %eax, 28(%esp) #, %sfp
movl 68(%esp), %eax # D.2719.m_left.m_left.m_right,
movl %eax, 24(%esp) #, %sfp
movl 72(%esp), %eax # D.2719.m_left.m_right,
movl %eax, 20(%esp) #, %sfp
movl 4(%ebx), %eax # <variable>.m_length, tmp97
addl 4(%edx), %eax # <variable>.m_length, tmp97
addl $12, %eax #,
movl %eax, 44(%esp) #, %sfp
movl 36(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 32(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 28(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 24(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 20(%esp), %eax # %sfp,
movl 4(%eax), %eax # <variable>.m_length,
addl %eax, 44(%esp) #, %sfp
movl 44(%esp), %eax # %sfp, tmp105
incl %eax # tmp105
movl %eax, (%esp) # tmp105,
movl %edx, 16(%esp) #,
call __Znaj #
movl %eax, 40(%esp) #, %sfp
movl (%ebx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%ebx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi #, D.3662
rep movsb
movl 40(%esp), %eax # %sfp, D.3735
addl 4(%ebx), %eax # <variable>.m_length, D.3735
movl 16(%esp), %edx #,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3735, D.3735
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3741
movl 36(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3741, D.3741
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3747
movl 32(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3747, D.3747
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3753
movl 28(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3753, D.3753
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3759
movl 24(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3759, D.3759
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3765
movl 20(%esp), %edx # %sfp,
movl (%edx), %esi # <variable>.m_value, <variable>.m_value
movl 4(%edx), %ecx # <variable>.m_length, <variable>.m_length
movl %eax, %edi # D.3765, D.3765
rep movsb
addl 4(%edx), %eax # <variable>.m_length, D.3771
movl $LC7, %esi #, tmp148
movb $3, %cl #,
movl %eax, %edi # D.3771, D.3771
rep movsl
movb $0, 12(%eax) #,
movl 40(%esp), %eax # %sfp,
movl %eax, 8(%esp) #,
movl 44(%esp), %edx # %sfp,
movl %edx, 4(%esp) #,
movl $LC8, (%esp) #,
call _printf #
movl 40(%esp), %eax # %sfp,
testl %eax, %eax #
je L2 #,
movl 40(%esp), %eax # %sfp,
movl %eax, (%esp) #,
call __ZdaPv #
L2:
xorl %eax, %eax #
addl $228, %esp #,
popl %ebx #
popl %esi #
popl %edi #
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef