1

我有一个以小端序排列的交错有符号 24 位整数(复数)数组,我想将其转换为浮点数或双精度数的复杂数组。通过交错,我的意思是:

R1 R2 R3 I1 I2 I3 R4 R5 R6 I4 I5 I6 . . .

其中每个项目是一个 8 位字节,每三个一起是一个 24 位整数,其中 R = 实数,I = 虚数。

在 C# 中执行此操作的最有效方法是什么?代码必须运行很多次,所以我试图尽可能地挤出最后一个循环。我希望有比蛮力转换或强制转换更有效的方法。

如果有帮助的话,我不介意在这种情况下使用不安全的代码。

这是基线的蛮力方法,注释掉了对的第二个数字,暂时忽略了符号处理,以简化 IDL:

class Program
{
    const int Size = 10000000;
    static void Main(string[] args)
    {
        //
        // Array of little-endian 24-bit complex ints
        // (least significant byte first)
        //
        byte[] buf = new byte[3 * 2 * Size];
        float[] real = new float[Size];
        //float[] imag = new float[Size];

        //
        // The brute-force way
        //
        int j = 0;
        Stopwatch timer = new Stopwatch();
        timer.Start();
        for (int i = 0; i < Size; i++)
        {
            real[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
            j += 3;
            // imag[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
            j += 3;
        }
        timer.Stop();
        Console.WriteLine("result = " + 
            (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
            " microseconds per complex number");
        Console.ReadLine();
    }
}

和相关的 IDL:

  IL_0024:  ldc.i4.0
  IL_0025:  stloc.s    i
  IL_0027:  br.s       IL_0050
  IL_0029:  ldloc.1
  IL_002a:  ldloc.s    i
  IL_002c:  ldloc.0
  IL_002d:  ldloc.2
  IL_002e:  ldelem.u1
  IL_002f:  ldloc.0
  IL_0030:  ldloc.2
  IL_0031:  ldc.i4.1
  IL_0032:  add
  IL_0033:  ldelem.u1
  IL_0034:  ldc.i4.8
  IL_0035:  shl
  IL_0036:  or
  IL_0037:  ldloc.0
  IL_0038:  ldloc.2
  IL_0039:  ldc.i4.2
  IL_003a:  add
  IL_003b:  ldelem.u1
  IL_003c:  ldc.i4.s   16
  IL_003e:  shl
  IL_003f:  or
  IL_0040:  conv.r4
  IL_0041:  stelem.r4
  IL_0042:  ldloc.2
  IL_0043:  ldc.i4.3
  IL_0044:  add
  IL_0045:  stloc.2
  IL_0046:  ldloc.2
  IL_0047:  ldc.i4.3
  IL_0048:  add
  IL_0049:  stloc.2
  IL_004a:  ldloc.s    i
  IL_004c:  ldc.i4.1
  IL_004d:  add
  IL_004e:  stloc.s    i
  IL_0050:  ldloc.s    i
  IL_0052:  ldc.i4     0x989680
  IL_0057:  blt.s      IL_0029
4

2 回答 2

1

派对迟到了,但这看起来很有趣;-)

下面的几个实验(使用不安全)。Method1() 是你的。在我的笔记本电脑上,使用 AnyCPU Release 版本,Method2() 有 20% 的持续改进,而 Method3() 没有显着的额外好处。(计时超过 100_000_000 次迭代。)

我正在寻找没有(显式)移位(屏蔽不可避免)的指针。

一些典型的结果...

result = 0.0075 microseconds per complex number
result = 0.00542 microseconds per complex number
result = 0.00516 microseconds per complex number

result = 0.00753 microseconds per complex number
result = 0.0052 microseconds per complex number
result = 0.00528 microseconds per complex number

代码...

using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace SO_20210326
{
  //  Enable unsafe code
  [StructLayout(LayoutKind.Explicit, Pack = 1, Size = 6)]
  struct NumPair
  {
    [FieldOffset(0)] public int r;
    [FieldOffset(3)] public int i;
  }

  class Program
  {
    const int Size = 100000000;
    static void Method1()
    {
      //
      // Array of little-endian 24-bit complex ints
      // (least significant byte first)
      //
      byte[] buf = new byte[3 * 2 * Size];
      float[] real = new float[Size];
      float[] imag = new float[Size];

      //
      // The brute-force way
      //
      int j = 0;
      Stopwatch timer = new Stopwatch();
      timer.Start();
      for (int i = 0; i < Size; i++)
      {
        real[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
        j += 3;
        imag[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
        j += 3;
      }
      timer.Stop();
      Console.WriteLine("result = " + 
                        (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                        " microseconds per complex number");
    }

    static void Method2()
    {
      NumPair[] buf = new NumPair[Size];
      float[] real = new float[Size];
      float[] imag = new float[Size];

      Stopwatch timer = new Stopwatch();
      timer.Start();
      for (int i = 0; i < Size; i++)
      {
        real[i] = buf[i].r & 0xffffff00;
        imag[i] = buf[i].i & 0xffffff00;

      }
      timer.Stop();
      Console.WriteLine("result = " + 
                        (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                        " microseconds per complex number");
    }

    static void Method3()
    {
      unsafe
      {
        NumPair[] buf = new NumPair[Size];
        float[] real = new float[Size];
        float[] imag = new float[Size];

        Stopwatch timer = new Stopwatch();
        timer.Start();
        fixed (void* pvalue = &buf[0])
        {
          var p = (byte*)pvalue;
          for (int i = 0; i < Size; i++)
          {
            real[i] = *(int*)p & 0xffffff00;
            p += 3;
            imag[i] = *(int*)p & 0xffffff00;
            p += 3;
          }
        }

        timer.Stop();
        Console.WriteLine("result = " + 
                          (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                          " microseconds per complex number");
      }
    }

    static void Main(string[] args)
    {
      Method1();
      Method2();
      Method3();
      Console.ReadLine();
    }
  }
}
于 2021-03-26T13:20:12.347 回答
0

尽管您需要在更大的单位(即intor long,而不是字节)中操作,但位移应该非常快。这避免了多次移位来组合三个字节。但这也意味着您必须从byte* buftouint* buf或进行不安全的强制转换ulong* buf。.NET 中的对象默认对齐为 4 或 8 个字节,但在少数情况下,您可能需要自己处理对齐问题

uint* dataIn = (uint*)(ref buf);
uint i = 0;
float real[buf.Length/6], imag[buf.Length/6];

for (var data = dataIn; data < dataIn + buf.Length; data += 3)
{
    // Extract (R123, I123) and (R456, I456) at once
    // D1--------- D2--------- D3---------
    // R1 R2 R3 I1 I2 I3 R4 R5 R6 I4 I5 I6
    real[i] = data[0] >> 8;
    imag[i] = ((data[0] & 0xff) << 16) | (data[1] >> 16);
    real[i + 1] = ((data[1] & 0xffff) << 8) | (data[2] >> 24);
    imag[i + 1] = data[2] & 0xffffff;
    i++;
}

这样做ulong也类似,但您将在一次迭代中提取 4 个复数,而不是像上面那样提取 2 个。如果大小不是 12 的倍数,那么剩余的字节将在最后提取,在循环之外。

您还可以轻松地在多个线程中运行转换。在较新的 .NET 框架中,SIMD 可能是另一种解决方案,因为.NET Core 添加了许多 SIMD 内在函数

于 2013-07-31T09:45:38.873 回答