1

使用 ocount 对代码进行分析显示更多的带有惩罚的周期和更少的带有惩罚的周期。我试图理解为什么惩罚标志打开时会有更多的惩罚?

uint16_t arr[1010];
uint32_t r[500];


void func()
{     
            uint32_t i = 0;


            for (i = 0; i < 1000; i+=2)
            {
                arr[i] = i;
                arr[i+1] = i+10;
        #ifdef PENALTY_ON

               r[i/2] = *(uint32_t *)((uint16_t *)&arr[i+1]);
        #endif
            }
        #ifndef PENALTY_ON
            for (i = 0; i < 1000; i+=2)
            {
                r[i/2] = *(uint32_t *)((uint16_t *)&arr[i+1]);
            }
        #endif
 }
4

1 回答 1

3

在带有 -O3 的 32 位机器上使用 gcc 编译两者

使用 PENALTY_ON

00000000 <func>:
0:  31 c0                   xor    %eax,%eax
2:  8d b6 00 00 00 00       lea    0x0(%esi),%esi
8:  8d 50 0a                lea    0xa(%eax),%edx
b:  66 89 94 00 02 00 00    mov    %dx,0x2(%eax,%eax,1)
12: 00 
13: 8b 8c 00 02 00 00 00    mov    0x2(%eax,%eax,1),%ecx
1a: 89 c2                   mov    %eax,%edx
1c: 66 89 84 00 00 00 00    mov    %ax,0x0(%eax,%eax,1)
23: 00 
24: 83 c0 02                add    $0x2,%eax
27: d1 ea                   shr    %edx
29: 3d e8 03 00 00          cmp    $0x3e8,%eax
2e: 89 0c 95 00 00 00 00    mov    %ecx,0x0(,%edx,4)
35: 75 d1                   jne    8 <func+0x8>
37: f3 c3                   repz ret  

没有 PENALTY_ON

00000000 <func>:
0:  31 c0                   xor    %eax,%eax
2:  8d b6 00 00 00 00       lea    0x0(%esi),%esi
8:  8d 50 0a                lea    0xa(%eax),%edx
b:  66 89 84 00 00 00 00    mov    %ax,0x0(%eax,%eax,1)
12: 00 
13: 66 89 94 00 02 00 00    mov    %dx,0x2(%eax,%eax,1)
1a: 00 
1b: 83 c0 02                add    $0x2,%eax
1e: 3d e8 03 00 00          cmp    $0x3e8,%eax
23: 75 e3                   jne    8 <func+0x8>
25: 66 31 c0                xor    %ax,%ax
28: 8b 8c 00 02 00 00 00    mov    0x2(%eax,%eax,1),%ecx
2f: 89 c2                   mov    %eax,%edx
31: 83 c0 02                add    $0x2,%eax
34: d1 ea                   shr    %edx
36: 3d e8 03 00 00          cmp    $0x3e8,%eax
3b: 89 0c 95 00 00 00 00    mov    %ecx,0x0(,%edx,4)
42: 75 e4                   jne    28 <func+0x28>
44: f3 c3                   repz ret 

我认为原因是 Read-after-Write 发生了PENALTY_ON

b:  66 89 94 00 02 00 00    mov    %dx,0x2(%eax,%eax,1)
12: 00 
13: 8b 8c 00 02 00 00 00    mov    0x2(%eax,%eax,1),%ecx
于 2014-10-07T11:11:13.113 回答