我试图重构二进制搜索算法的优化步骤。我从这个迭代版本开始:
int binsearch_1( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
int found=0;
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; }
else if( p[w] > key ){ size =w; }
else /* p[w] == key */{ p+=w; found=1; break; }
}
*index=p-array; return found;
}
从循环中消除比较:
int binsearch_2( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 0 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
*index=p-array; return p[0]==key;
}
从循环中展开小尺寸:
int binsearch_3( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
重新排序 if 语句,将特殊情况 [size==pow(2,N)-1] 移到末尾:
int binsearch_4( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
将 if 语句更改为 switch 语句:
int binsearch_5( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
switch(size){
case 7: if( p[4] <= key ) p+=4;
case 3: if( p[2] <= key ) p+=2;
case 1: if( p[1] <= key ) p+=1;
}
*index=p-array; return p[0]==key;
}
扩展 switch 语句以处理所有特殊情况:
int binsearch_6( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
switch( size ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
break;
default:
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; } else size=w;
}
}
*index=p-array; return p[0]==key;
}
从代码中消除一般情况处理: [ w 是最小的数字: w==pow(2,N)-1; 尺寸<=2*(w+1) ]
int binsearch_7( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
size_t w=(size-1)>>1; w|=w>>1; w|=w>>2; w|=w>>4; w|=w>>8; w|=w>>16;
#if SIZE_MAX == UINT64_MAX
w|=w>>32;
#endif
if( p[w]<key ) p+=size-w-1;
switch( w ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
}
*index=p-array; return p[0]==key;
}
我所做的最后一步是简化案例标签 [从:'((size_t)1<<n)-1' 到:'n'] 但我发现修改后的代码在我的旧 PC 上比以前的版本。