在带有 -O3 的 32 位机器上使用 gcc 编译两者
使用 PENALTY_ON
00000000 <func>:
0: 31 c0 xor %eax,%eax
2: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
8: 8d 50 0a lea 0xa(%eax),%edx
b: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
12: 00
13: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx
1a: 89 c2 mov %eax,%edx
1c: 66 89 84 00 00 00 00 mov %ax,0x0(%eax,%eax,1)
23: 00
24: 83 c0 02 add $0x2,%eax
27: d1 ea shr %edx
29: 3d e8 03 00 00 cmp $0x3e8,%eax
2e: 89 0c 95 00 00 00 00 mov %ecx,0x0(,%edx,4)
35: 75 d1 jne 8 <func+0x8>
37: f3 c3 repz ret
没有 PENALTY_ON
00000000 <func>:
0: 31 c0 xor %eax,%eax
2: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
8: 8d 50 0a lea 0xa(%eax),%edx
b: 66 89 84 00 00 00 00 mov %ax,0x0(%eax,%eax,1)
12: 00
13: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
1a: 00
1b: 83 c0 02 add $0x2,%eax
1e: 3d e8 03 00 00 cmp $0x3e8,%eax
23: 75 e3 jne 8 <func+0x8>
25: 66 31 c0 xor %ax,%ax
28: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx
2f: 89 c2 mov %eax,%edx
31: 83 c0 02 add $0x2,%eax
34: d1 ea shr %edx
36: 3d e8 03 00 00 cmp $0x3e8,%eax
3b: 89 0c 95 00 00 00 00 mov %ecx,0x0(,%edx,4)
42: 75 e4 jne 28 <func+0x28>
44: f3 c3 repz ret
我认为原因是 Read-after-Write 发生了PENALTY_ON
b: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
12: 00
13: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx