这是我的演示程序:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int cmp(const void *d1, const void *d2)
{
int a, b;
a = *(int const *) d1;
b = *(int const *) d2;
if (a > b)
return 1;
else if (a == b)
return 0;
return -1;
}
int main()
{
int seed = time(NULL);
srandom(seed);
int i, n, max = 32768, a[max];
for (n=0; n < max; n++) {
int r = random() % 256;
a[n] = r;
}
qsort(a, max, sizeof(int), cmp);
clock_t beg = clock();
long long int sum = 0;
for (i=0; i < 20000; i++)
{
for (n=0; n < max; n++) {
if (a[n] >= 128)
sum += a[n];
}
}
clock_t end = clock();
double sec = (end - beg) / CLOCKS_PER_SEC;
printf("sec: %f\n", sec);
printf("sum: %lld\n", sum);
return 0;
}
unsorted
sec: 5.000000
sum: 63043880000
sorted
sec: 1.000000
sum: 62925420000
这是该程序两个版本的程序集差异,一个有qsort
一个没有:
--- unsorted.s
+++ sorted.s
@@ -58,7 +58,7 @@
shrl $4, %eax
sall $4, %eax
subl %eax, %esp
- leal 4(%esp), %eax
+ leal 16(%esp), %eax
addl $15, %eax
shrl $4, %eax
sall $4, %eax
@@ -83,6 +83,13 @@
movl -16(%ebp), %eax
cmpl -24(%ebp), %eax
jl .L7
+ movl -24(%ebp), %eax
+ movl $cmp, 12(%esp)
+ movl $4, 8(%esp)
+ movl %eax, 4(%esp)
+ movl -32(%ebp), %eax
+ movl %eax, (%esp)
+ call qsort
movl $0, -48(%ebp)
movl $0, -44(%ebp)
movl $0, -12(%ebp)
据我了解汇编输出,排序后的版本由于将值传递给了更多的代码qsort
,但我没有看到任何分支优化/预测/任何东西。也许我看错了方向?