问题是
基本 2log 是否还有其他(和/或更快)实现?
应用
log2(int) 和 log2(float) 操作在许多不同的上下文中都非常有用。仅举几例:压缩算法、3D 引擎和机器学习。在几乎所有这些上下文中,它们都用于被调用数十亿次的低级代码中……尤其是 log2(int) 操作非常有用。
因为我发现自己一直在使用 log2,所以我不想在这里给出我正在开发的特定应用程序。相同的是,这是一个真正的性能消耗者(如各种应用程序的性能测试所示)。对我来说,尽可能快地做到这一点是关键。
底部添加了测试所有实现的完整源代码,您可以自己查看。
当然……至少运行 3 次测试,并确保计数器足够大,可以击中几秒钟。我还执行“添加”操作以确保 JIT'ter 不会神奇地删除整个循环。那么让我们开始真正的工作吧。
简单的实现
C# 中 2log 的简单实现是:
(int)(Math.Log(x) / Math.Log(2))
这个实现很简单,但也很慢。它需要 2 个日志操作,这些操作本身已经很慢了。当然,我们可以通过1.0/Math.Log(2)
设置一个常数来优化它。
请注意,我们需要稍微修改这个常数以获得正确的结果(作为浮点错误的结果)或添加一个小数字以获得正确的结果。我选择了后者,但这并不重要——最终结果在所有情况下都很慢。
查表
一个更快的解决方案是使用查找表。虽然您可以使用 2 的任意幂的查找表,但我通常使用 256 或 64K 条目的表大小。
首先我们创建查找表:
lookup = new int[256];
for (int i = 1; i < 256; ++i)
{
lookup[i] = (int)(Math.Log(i) / Math.Log(2));
}
接下来,我们实现2log如下:
private static int LogLookup(int i)
{
if (i >= 0x1000000) { return lookup[i >> 24] + 24; }
else if (i >= 0x10000) { return lookup[i >> 16] + 16; }
else if (i >= 0x100) { return lookup[i >> 8] + 8; }
else { return lookup[i]; }
}
如您所见,表查找是一种非常快得多的实现——但作为一个缺点,它不能用于计算log2(float)
.
分支移除
众所周知,处理器不太擅长分支,所以我认为可以通过删除分支来改进表查找。我引入了第二个表,其中包含值和移位位以在表中找到条目,而不是一堆 if :
nobranch = new int[16] { 0, 0, 8, 8, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24 };
private static int LogDoubleLookup(int i)
{
int n = (i | (i >> 4));
n = (n | (n >> 2));
n = (n | (n >> 1));
n = ((n & 0x1000000) >> 21) | ((n & 0x10000) >> 14) | ((n & 0x100) >> 7) | (n & 1);
int br = nobranch[n];
return lookup[i >> br] + br;
}
如果你运行这个测试,你会发现它实际上比 if-then-else 解决方案要慢。
然后是英特尔 80386
英特尔多年前就知道这是一项重要的操作,因此他们在其处理器中实施了位扫描转发 (BSF)。其他处理器也有类似的指令。这是迄今为止我所知道的最快的 2log 方法 - 但不幸的是,我现在知道如何使用 C# 中的这些好功能......我不喜欢有一个不再运行的实现的想法当新的平板电脑或手机上市时——我不知道有任何跨平台解决方案可以让我直接使用这个功能。
其他实现
正如 l4V 指出的(谢谢!)还有其他几个实现,特别是:
- 微不足道的循环。我省略了这一点,因为这很简单,这并不是真的很快。实施于
TestTrivial
. - 可以使用的 64 位 IEEE / int union's。实施于
TestFloat
- DeBruijn 查找表。实施于
TestDeBruijn
- 二进制搜索。实施于
TestBinary
除了我喜欢这个名字之外,DeBruijn 查找表与普通查找表一样快,使其成为这里最快的算法之一……我尝试过的所有其他算法都慢得多。
完整的测试代码
public class Log2Test
{
public static void TestNaive()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += (int)(Math.Log(i) / Math.Log(2.0));
}
sw.Stop();
Console.WriteLine("Result: {0} - naive implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
public static int LogTrivialLoop(int v)
{
int r = 0;
while ((v >>= 1) > 0) // unroll for more speed...
{
r++;
}
return r;
}
public static void TestTrivialLoop()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogTrivialLoop(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - loop implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
public static int LogFloat(int v)
{
Helper h = new Helper() { U1 = v, U2 = 0x43300000 };
h.D -= 4503599627370496.0;
return (h.U2 >> 20) - 0x3FF;
}
public static void TestFloat()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogFloat(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - IEEE float implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
[StructLayout(LayoutKind.Explicit)]
private struct Helper
{
[FieldOffset(0)]
public int U1;
[FieldOffset(4)]
public int U2;
[FieldOffset(0)]
public double D;
}
public static void TestConstant()
{
double c = 1.0 / Math.Log(2.0);
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += (int)(0.00000000001 + Math.Log(i) * c);
}
sw.Stop();
Console.WriteLine("Result: {0} - naive 2 implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogLookup(int i)
{
if (i >= 0x1000000) { return lookup[i >> 24] + 24; }
else if (i >= 0x10000) { return lookup[i >> 16] + 16; }
else if (i >= 0x100) { return lookup[i >> 8] + 8; }
else { return lookup[i]; }
}
public static void TestLookup()
{
lookup = new int[256];
for (int i = 1; i < 256; ++i)
{
lookup[i] = (int)(Math.Log(i) / Math.Log(2));
}
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogLookup(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - table lookup implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogDoubleLookup(int i)
{
int n = (i | (i >> 4));
n = (n | (n >> 2));
n = (n | (n >> 1));
n = ((n & 0x1000000) >> 21) | ((n & 0x10000) >> 14) | ((n & 0x100) >> 7) | (n & 1);
int br = nobranch[n];
return lookup[i >> br] + br;
}
public static void TestDoubleLookup()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogDoubleLookup(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - double table lookup implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogBinary(int v)
{
/* This is the worst implementation ever... - apparently C# is a slow-branching language
int[] b = { 0x2, 0xC, 0xF0, 0xFF00, 0x7FFF0000 };
int[] S = { 1, 2, 4, 8, 16 };
int r = 0; // result of log2(v) will go here
for (int i = 4; i >= 0; i--) // unroll for speed...
{
if ((v & b[i]) != 0)
{
v >>= S[i];
r |= S[i];
}
}
return r;
*/
int r = (((v > 0xFFFF)) ? 0x10 : 0);
v >>= r;
int shift = ((v > 0xFF) ? 0x8 : 0);
v >>= shift;
r |= shift;
shift = ((v > 0xF) ? 0x4 : 0);
v >>= shift;
r |= shift;
shift = ((v > 0x3) ? 0x2 : 0);
v >>= shift;
r |= shift;
r |= (v >> 1);
return r;
}
public static void TestBinary()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogBinary(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - binary search implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static readonly int[] MultiplyDeBruijnBitPosition = new int[32]
{
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};
private static int LogDeBruijn(int v)
{
v |= v >> 1; // first round down to one less than a power of 2
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return MultiplyDeBruijnBitPosition[(uint)(v * 0x07C4ACDDU) >> 27];
}
public static void TestDeBruijn()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogDeBruijn(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - de Bruijn implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int[] lookup;
private static readonly int[] nobranch = new int[16] { 0, 0, 8, 8, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24 };
static void Main(string[] args)
{
TestConstant();
TestNaive();
TestDeBruijn();
TestBinary();
TestFloat();
TestTrivialLoop();
TestLookup();
TestDoubleLookup();
Console.ReadLine();
}
}