1

要求如下:

/* length must be >= 18 */

int calcActualLength(int length) {
    int remainder = (length - 18) % 8;
    if (remainder == 0)
        return length;
    return length + 8 - remainder;
}

使用按位运算符,我可以重构第一行

int remainder = (length - 2) & 7;

能否进一步优化?

4

2 回答 2

2

((length+5)&~7)+2

int calcActualLength(int length) {
    int remainder = (length - 18) % 8;
    if (remainder == 0)
        return length;
    return length + 8 - remainder;
}
==>
int HELPER_calcActualLength(int length) {
    int remainder = length % 8;
    if (remainder == 0)
        return length;
    return length + 8 - remainder;
}
int calcActualLength(int length) {
    return 18 + HELPER_calcActualLength(length - 18);
}

并且当参数 >= 0 时在语义上HELPER_calcActualLength()等于ROUNDUP_8()

更简单的 ROUNDUP_8() 可以是:

#define ROUNDUP_8(x) (((x)+7)&~7)

int calcActualLength(int length) {
    return 18 + ROUNDUP_8(length - 18);
}
==>    2 + ROUNDUP_8(length - 18 + 16);
==>    2 + ROUNDUP_8(length - 2);
==>    2 + (((length - 2)+7)&~7)
==>    ((length+5)&~7)+2
于 2012-07-27T09:18:36.280 回答
2

使用以下代码编译时,原始代码会生成以下 64 位程序集gcc -O3

        movl    %edi, %eax
        leal    -18(%rax), %ecx
        movl    %ecx, %edx
        sarl    $31, %edx
        shrl    $29, %edx
        addl    %edx, %ecx
        andl    $7, %ecx
        subl    %edx, %ecx
        je      .L2
        addl    $8, %eax
        subl    %ecx, %eax
.L2:
        rep

正如对您问题的评论中所建议的,将参数更改为unsigned int允许更大的优化并导致以下程序集:

        leal    -18(%rdi), %edx
        movl    %edi, %eax
        andl    $7, %edx
        je      .L3
        leal    8(%rdi), %eax
        subl    %edx, %eax
.L3:
        rep

8可以通过添加7和屏蔽来执行向上舍入到的倍数~7。它的工作原理是这样的:如果最后三位不全为零,则将7进位添加到第 4 位,否则不发生进位。所以你的功能可以简化为:

return (((length - 18) + 7) & ~7) + 18;

或更简单:

return ((length - 11) & ~7) + 18;

GCC 将最后一行简单地编译为:

        leal    -11(%rdi), %eax
        andl    $-8, %eax
        addl    $18, %eax

请注意,lea(加载有效地址)指令经常被“滥用”,因为它能够计算简单的线性组合,如reg1 + size*reg2 + offset

于 2012-07-27T10:34:55.363 回答