optimization - 确定字节中最显着设置位的位置

Question

我有一个byte用来存储位标志的。我需要计算字节中最高有效位的位置。

示例字节： =>是最高有效位的位置001011016

紧凑的十六进制映射：

[0x00]      => 0x00
[0x01]      => 0x01
[0x02,0x03] => 0x02
[0x04,0x07] => 0x03
[0x08,0x0F] => 0x04
[0x10,0x1F] => 0x05
[0x20,0x3F] => 0x06
[0x40,0x7F] => 0x07
[0x80,0xFF] => 0x08

C中的测试用例：

#include <stdio.h>

unsigned char check(unsigned char b) {
  unsigned char c = 0x08;
  unsigned char m = 0x80;
  do {
    if(m&b) { return  c; }
    else    { c -= 0x01; }
  } while(m>>=1);
  return 0; //never reached
}
int main() {
  unsigned char input[256] = {
    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff };

  unsigned char truth[256] = {
    0x00,0x01,0x02,0x02,0x03,0x03,0x03,0x03,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04, 
    0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, 
    0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06, 
    0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08};

  int i,r;
  int f = 0;
  for(i=0; i<256; ++i) {
    r=check(input[i]);
    if(r !=(truth[i])) {
      printf("failed %d : 0x%x : %d\n",i,0x000000FF & ((int)input[i]),r);
      f += 1;
    }
  }
  if(!f) { printf("passed all\n");  }
  else   { printf("failed %d\n",f); }
  return 0;
}

我想简化我的check()功能以不涉及循环（或最好分支）。是否有一些小技巧或散列查找表解决方案来计算字节中最重要的设置位的位置？

score 3 · Accepted Answer

您的问题是关于计算log2价值的有效方法。而且因为您似乎想要一个不限于 C 语言的解决方案，所以我有点懒惰并调整了一些 C# 代码。

您想要计算log2(x) + 1并且对于x = 0（其中log2未定义）您将结果定义为 0（例如，您创建了一个特殊情况 where log2(0) = -1）。

static readonly Byte[] multiplyDeBruijnBitPosition = new Byte[] {
  7, 2, 3, 4,
  6, 1, 5, 0
};

public static Byte Log2Plus1(Byte value) {
  if (value == 0)
    return 0;

  var roundedValue = value;
  roundedValue |= (Byte) (roundedValue >> 1);
  roundedValue |= (Byte) (roundedValue >> 2);
  roundedValue |= (Byte) (roundedValue >> 4);
  var log2 = multiplyDeBruijnBitPosition[((Byte) (roundedValue*0xE3)) >> 5];
  return (Byte) (log2 + 1);
}

这个小技巧取自Find the log base 2 of an N-bit integer in O(lg(N)) operations with multiply and lookup，您可以在其中看到 32 位值的等效 C 源代码。此代码已适用于处理 8 位值。

但是，您可以使用非常有效的内置函数（在许多 CPU 上使用一条指令，如位扫描反向）来为您提供结果的操作。问题的答案Bit twiddling：设置了哪个位？有一些关于此的信息。答案中的引用提供了解决此问题的低级别支持的一个可能原因：

像这样的事情是许多 O(1) 算法的核心，例如内核调度程序，它们需要找到由位数组表示的第一个非空队列。

score 1 · Accepted Answer

我敢肯定，其他人早就转移到其他主题了，但我的脑海里有一些东西表明，必须有一个更有效的无分支解决方案，而不仅仅是在我发布的其他解决方案中展开循环。快速浏览我的Warren副本让我走上了正确的轨道：二分搜索。

这是我基于这个想法的解决方案：

  Pseudo-code:

  // see if there's a bit set in the upper half   
  if ((b >> 4) != 0)  
  {
      offset = 4;
      b >>= 4;   
  }   
  else
      offset = 0;

  // see if there's a bit set in the upper half of what's left   
  if ((b & 0x0C) != 0)   
  {
    offset += 2;
    b >>= 2;   
  }

  // see if there's a bit set in the upper half of what's left   
  if > ((b & 0x02) != 0)   
  {
    offset++;
    b >>= 1;   
  }

  return b + offset;

无分支 C++ 实现：

static unsigned char check(unsigned char b)
{    
  unsigned char adj = 4 & ((((unsigned char) - (b >> 4) >> 7) ^ 1) - 1);
  unsigned char offset = adj;
  b >>= adj;
  adj = 2 & (((((unsigned char) - (b & 0x0C)) >> 7) ^ 1) - 1);
  offset += adj;
  b >>= adj;
  adj = 1 & (((((unsigned char) - (b & 0x02)) >> 7) ^ 1) - 1);
  return (b >> adj) + offset + adj;
}

是的，我知道这都是学术性的:)

score 1 · Accepted Answer

这是一个有趣的小挑战。我不知道这个是否完全可移植，因为我只有 VC++ 可以测试，我当然不能确定它是否比其他方法更有效。这个版本是用循环编码的，但可以毫不费力地展开。

static unsigned char check(unsigned char b)
{
  unsigned char r = 8;
  unsigned char sub = 1;
  unsigned char s = 7;
  for (char i = 0; i < 8; i++)
  {
      sub = sub & ((( b & (1 << s)) >> s--) - 1);
      r -= sub;
  }
  return r;
}

score 0 · Accepted Answer

在纯 C 语言中这是不可能的。我建议的最好的方法是check的以下实现。尽管相当“丑陋”，但我认为它比问题中的ckeck版本运行得更快。

int check(unsigned char b)
{
    if(b&128) return 8;
    if(b&64)  return 7;
    if(b&32)  return 6;
    if(b&16)  return 5;
    if(b&8)   return 4;
    if(b&4)   return 3;
    if(b&2)   return 2;
    if(b&1)   return 1;
              return 0;
}

score 0 · Accepted Answer

编辑：我找到了实际代码的链接：http ://www.hackersdelight.org/hdcodetxt/nlz.c.txt 下面的算法nlz8在该文件中命名。你可以选择你最喜欢的黑客。

/*
From last comment of: http://stackoverflow.com/a/671826/315052
> Hacker's Delight explains how to correct for the error in 32-bit floats
> in 5-3 Counting Leading 0's. Here's their code, which uses an anonymous
> union to overlap asFloat and asInt: k = k & ~(k >> 1); asFloat =
> (float)k + 0.5f; n = 158 - (asInt >> 23); (and yes, this relies on
> implementation-defined behavior) - Derrick Coetzee Jan 3 '12 at 8:35
*/

unsigned char check (unsigned char b) {
    union {
        float    asFloat;
        int      asInt;
    } u;
    unsigned k = b & ~(b >> 1);
    u.asFloat = (float)k + 0.5f;
    return 32 - (158 - (u.asInt >> 23));
}

编辑——不完全确定询问者的语言独立是什么意思，但下面是 python 中的等效代码。

import ctypes

class Anon(ctypes.Union):
    _fields_ = [
        ("asFloat", ctypes.c_float),
        ("asInt", ctypes.c_int)
    ]

def check(b):
    k = int(b) & ~(int(b) >> 1)
    a = Anon(asFloat=(float(k) + float(0.5)))
    return 32 - (158 - (a.asInt >> 23))

optimization - 确定字节中最显着设置位的位置

5 回答 5

Related

Reference