c++ - 与零比较时的 int 运算符 != 和 ==

Question

我发现 != 和 == 并不是测试零或非零的最快方法。

bool nonZero1 = integer != 0;
xor eax, eax
test ecx, ecx
setne al

bool nonZero2 = integer < 0 || integer > 0;
test ecx, ecx
setne al

bool zero1 = integer == 0;
xor eax, eax
test ecx, ecx
sete al

bool zero2 = !(integer < 0 || integer > 0);
test ecx, ecx
sete al

编译器：VC++ 11 优化标志：/O2 /GL /LTCG

这是 x86-32 的汇编输出。两个比较的第二个版本在 x86-32 和 x86-64 上都快了约 12%。但是，在 x86-64 上，指令是相同的（第一个版本看起来与第二个版本完全相同），但第二个版本仍然更快。

为什么编译器不在 x86-32 上生成更快的版本？
当程序集输出相同时，为什么第二个版本在 x86-64 上仍然更快？

编辑：我添加了基准测试代码。零：1544 毫秒、1358 毫秒非零：1544 毫秒、1358 毫秒 http://pastebin.com/m7ZSUrcP 或 http://anonymouse.org/cgi-bin/anon-www.cgi/http://pastebin.com/m7ZSUrcP

注意：在单个源文件中编译时定位这些函数可能不方便，因为 main.asm 很大。我在单独的源文件中有 zero1、zero2、nonZero1、nonZero2。

EDIT2：安装了 VC++11 和 VC++2010 的人可以运行基准测试代码并发布时间吗？它可能确实是 VC++11 中的一个错误。

score 122 · Accepted Answer

这是一个很好的问题，但我认为你已经成为编译器依赖分析的受害者。

编译器只需清除一次的高位eax，并且在第二个版本中它们保持清除。第二个版本必须付出代价，xor eax, eax除非编译器分析证明它已被第一个版本清除。

第二个版本能够利用编译器在第一个版本中所做的工作来“作弊”。

你如何衡量时间？是“循环中的（版本一，后跟版本二）”还是“（循环中的版本一）后跟（循环中的版本二）”？

不要在同一个程序中进行这两个测试（而是为每个版本重新编译），或者如果这样做，请同时测试“版本 A 先”和“版本 B 先”，看看哪个先到者会付出代价。

作弊说明：

timer1.start();
double x1 = 2 * sqrt(n + 37 * y + exp(z));
timer1.stop();
timer2.start();
double x2 = 31 * sqrt(n + 37 * y + exp(z));
timer2.stop();

如果timer2duration小于timer1duration，我们不会得出乘以31比乘2快的结论。相反，我们意识到编译器执行了公共子表达式分析，代码变为：

timer1.start();
double common = sqrt(n + 37 * y + exp(z));
double x1 = 2 * common;
timer1.stop();
timer2.start();
double x2 = 31 * common;
timer2.stop();

唯一证明的是，乘以 31 比计算快common。这一点都不奇怪——乘法比sqrtand快得多exp。

score 19 · Accepted Answer

编辑：看到我的代码的 OP 汇编列表。我怀疑这甚至是现在VS2011 的一般错误。这可能只是 OP 代码的一个特例错误。我使用 clang 3.2、gcc 4.6.2 和 VS2010 按原样运行 OP 的代码，在所有情况下，最大差异都在 ~1% 左右。

ne.c刚刚编译了对我的文件/O2和/GL标志进行适当修改的源代码。这是来源

int ne1(int n) {
 return n != 0;
 }

 int ne2(int n) {
 return n < 0 || n > 0;
 }

 int ne3(int n) {
 return !(n == 0);
 }

int main() { int p = ne1(rand()), q = ne2(rand()), r = ne3(rand());}

和相应的组件：

    ; Listing generated by Microsoft (R) Optimizing Compiler Version 16.00.30319.01 

    TITLE   D:\llvm_workspace\tests\ne.c
    .686P
    .XMM
    include listing.inc
    .model  flat

INCLUDELIB OLDNAMES

EXTRN   @__security_check_cookie@4:PROC
EXTRN   _rand:PROC
PUBLIC  _ne3
; Function compile flags: /Ogtpy
;   COMDAT _ne3
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne3    PROC                        ; COMDAT
; File d:\llvm_workspace\tests\ne.c
; Line 11
    xor eax, eax
    cmp DWORD PTR _n$[esp-4], eax
    setne   al
; Line 12
    ret 0
_ne3    ENDP
_TEXT   ENDS
PUBLIC  _ne2
; Function compile flags: /Ogtpy
;   COMDAT _ne2
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne2    PROC                        ; COMDAT
; Line 7
    xor eax, eax
    cmp eax, DWORD PTR _n$[esp-4]
    sbb eax, eax
    neg eax
; Line 8
    ret 0
_ne2    ENDP
_TEXT   ENDS
PUBLIC  _ne1
; Function compile flags: /Ogtpy
;   COMDAT _ne1
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne1    PROC                        ; COMDAT
; Line 3
    xor eax, eax
    cmp DWORD PTR _n$[esp-4], eax
    setne   al
; Line 4
    ret 0
_ne1    ENDP
_TEXT   ENDS
PUBLIC  _main
; Function compile flags: /Ogtpy
;   COMDAT _main
_TEXT   SEGMENT
_main   PROC                        ; COMDAT
; Line 14
    call    _rand
    call    _rand
    call    _rand
    xor eax, eax
    ret 0
_main   ENDP
_TEXT   ENDS
END

ne2()其中使用<,>和||运算符显然更昂贵。ne1()并且ne3()分别使用==and!=运算符，更简洁且等效。

Visual Studio 2011处于测试阶段。我会认为这是一个错误。我对其他两个编译器的测试，即gcc 4.6.2和clang 3.2，O2优化开关在我的 Windows 7 机器上为所有三个测试（我拥有的）产生了完全相同的程序集。这是一个摘要：

$ cat ne.c

#include <stdbool.h>
bool ne1(int n) {
    return n != 0;
}

bool ne2(int n) {
    return n < 0 || n > 0;
}

bool ne3(int n) {
    return !(n != 0);
}

int main() {}

使用 gcc 产生：

_ne1:
LFB0:
    .cfi_startproc
    movl    4(%esp), %eax
    testl   %eax, %eax
    setne   %al
    ret
    .cfi_endproc
LFE0:
    .p2align 2,,3
    .globl  _ne2
    .def    _ne2;   .scl    2;  .type   32; .endef
_ne2:
LFB1:
    .cfi_startproc
    movl    4(%esp), %edx
    testl   %edx, %edx
    setne   %al
    ret
    .cfi_endproc
LFE1:
    .p2align 2,,3
    .globl  _ne3
    .def    _ne3;   .scl    2;  .type   32; .endef
_ne3:
LFB2:
    .cfi_startproc
    movl    4(%esp), %ecx
    testl   %ecx, %ecx
    sete    %al
    ret
    .cfi_endproc
LFE2:
    .def    ___main;    .scl    2;  .type   32; .endef
    .section    .text.startup,"x"
    .p2align 2,,3
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB3:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    call    ___main
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE3:

和铿锵声：

    .def     _ne1;
    .scl    2;
    .type   32;
    .endef
    .text
    .globl  _ne1
    .align  16, 0x90
_ne1:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne2;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne2
    .align  16, 0x90
_ne2:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne3;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne3
    .align  16, 0x90
_ne3:
    cmpl    $0, 4(%esp)
    sete    %al
    movzbl  %al, %eax
    ret

    .def     _main;
    .scl    2;
    .type   32;
    .endef
    .globl  _main
    .align  16, 0x90
_main:
    pushl   %ebp
    movl    %esp, %ebp
    calll   ___main
    xorl    %eax, %eax
    popl    %ebp
    ret

我的建议是将此作为Microsoft Connect的错误提交。

注意：我将它们编译为 C 源代码，因为我认为使用相应的 C++ 编译器不会在此处进行任何重大更改。

c++ - 与零比较时的 int 运算符 != 和 ==

2 回答 2

Related

Reference