我正在尝试找到最快的算法来进行前端转换。例如与 burrows Wheeler 变换结合使用的那个。
到目前为止,我在 Core i3 2.1GHz 上管理的最好速度约为 15MB/s。但我确信这不是最佳的。这是我迄今为止的最大努力。有什么更快的吗?
class mtf256_x {
typedef unsigned char u8;
typedef unsigned long long L;
public:
L enc[37];
u8 dec[256];
mtf256_x() {
unsigned i;
for (i=0;i<37;i++) {
enc[i]=0;
}
for (i=0;i<256;i++) {
dec[i]=i;
set(i,i);
}
}
u8 decode(u8 in) {
u8 r = dec[in];
if (in) {
memmove(dec+1,dec,in);
dec[0]=r;
}
return r;
}
u8 set(unsigned x, u8 y) {
unsigned xl = (x%7)*9;
unsigned xh = (x/7);
enc[xh] &= ~(0x1FFLLU<<xl);
enc[xh] |= ((L)y)<<xl;
}
u8 get(unsigned x) {
return enc[x/7] >> (x%7)*9;
}
u8 encode(u8 in) {
u8 r;
unsigned i;
r = get(in);
L m2 = 0x0040201008040201LLU; // 0x01 for each 9 bit int
L m1 = 0x3FDFEFF7FBFDFEFFLLU; // 0xff for each 9 bit int
L Q = (0x100+r)*m2;
L a,b,c,d;
L * l= enc;
for (i=0;i<37;i++) {
a=l[i];
a+= ((Q-a)>>8)&m2; // conditional add 1
a&=m1;
l[i]=a;
}
set(in,0);
return r;
}
};