10

这个问题中,以下代码:

public static void Swap(byte[] data)
{
        for (int i = 0; i < data.Length; i += 2)
        {
                byte b = data[i];
                data[i] = data[i + 1];
                data[i + 1] = b;
        }
}

用不安全的代码重写以提高其性能:

public static unsafe void SwapX2(Byte[] Source)  
{  
    fixed (Byte* pSource = &Source[0])  
    {  
        Byte* bp = pSource;  
        Byte* bp_stop = bp + Source.Length;  

        while (bp < bp_stop)  
        {
            *(UInt16*)bp = (UInt16)(*bp << 8 | *(bp + 1));  
            bp += 2;  
        }  
    }  
}

假设一个人想用 32 位字做同样的事情:

public static void SwapX4(byte[] data)
{
    byte temp;
    for (int i = 0; i < data.Length; i += 4)
    {
        temp = data[i];
        data[i] = data[i + 3];
        data[i + 3] = temp;
        temp = data[i + 1];
        data[i + 1] = data[i + 2];
        data[i + 2] = temp;
    }
}

这将如何以类似的方式重写?

4

2 回答 2

11
public static unsafe void SwapX4(Byte[] Source)  
{  
    fixed (Byte* pSource = &Source[0])  
    {  
        Byte* bp = pSource;  
        Byte* bp_stop = bp + Source.Length;  

        while (bp < bp_stop)  
        {
            *(UInt32*)bp = (UInt32)(
                (*bp       << 24) |
                (*(bp + 1) << 16) |
                (*(bp + 2) <<  8) |
                (*(bp + 3)      ));
            bp += 4;  
        }  
    }  
}

请注意,这两个函数(我的 SwapX4 和您的 SwapX2)只会交换 little-endian 主机上的任何内容;在大端主机上运行时,它们是昂贵的空操作。

于 2012-07-25T23:23:00.677 回答
3

此版本不会超出缓冲区的范围。适用于 Little Endian 和 Big Endian 架构。并且在更大的数据上速度更快。(更新:为 x86 和 x64 添加构建配置,为 32 位(x86)预定义 X86 和为 64 位(x64)预定义 X64,它会稍微快一些。)

public static unsafe void Swap4(byte[] source)
{
    fixed (byte* psource = source)
    {
#if X86
            var length = *((uint*)(psource - 4)) & 0xFFFFFFFEU;
#elif X64
            var length = *((uint*)(psource - 8)) & 0xFFFFFFFEU;
#else
            var length = (source.Length & 0xFFFFFFFE);
#endif
        while (length > 7)
        {
            length -= 8;
            ulong* pulong = (ulong*)(psource + length);
            *pulong = ( ((*pulong >> 24) & 0x000000FF000000FFUL)
                      | ((*pulong >> 8)  & 0x0000FF000000FF00UL)
                      | ((*pulong << 8)  & 0x00FF000000FF0000UL)
                      | ((*pulong << 24) & 0xFF000000FF000000UL));
        }
        if(length != 0)
        {
            uint* puint = (uint*)psource;
            *puint = ( ((*puint >> 24))
                     | ((*puint >> 8) & 0x0000FF00U)
                     | ((*puint << 8) & 0x00FF0000U)
                     | ((*puint << 24)));
        }
    }
}
于 2018-03-13T05:27:37.447 回答