cuda - CUDA 逆 FFT 错误

Question

我有以下代码在执行逆 FFT 时有错误。当我打印输出并验证它时，前向 FFT 起作用。但反过来似乎没有。有任何想法吗？看起来我错过了一个概念吗？

编辑 - 我基本上重写了 CUDA 工具包示例附带的代码。我正在尝试使用 FFT 但使用修改后的算法（实际上是 DIF）执行卷积。

EDIT2 - 为问题添加代码。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <cuda_runtime.h>
#include <cufft.h>

typedef enum signaltype {REAL, COMPLEX} signal;

typedef float2 Complex;

void
printData(Complex *a, int size, char *msg) {

  if (msg == "") printf("\n");
  else printf("%s\n", msg);

  for (int i = 0; i < size; i++)
    printf("%f %f\n", a[i].x, a[i].y);
}

void
normData(Complex *a, int size, float norm) {

  for (int i = 0; i < size; i++) {
    a[i].x /= norm;
    a[i].y /= norm;
  }
}

void
randomFill(Complex *h_signal, int size, int flag) {

  // Real signal.
  if (flag == REAL) {
    for (int i = 0; i < size; i++) {
      h_signal[i].x = rand() / (float) RAND_MAX;
      h_signal[i].y = 0;
    }
  }
}

// FFT a signal that's on the _DEVICE_.
void
signalFFT(Complex *d_signal, int signal_size) {

  cufftHandle plan;
  if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
    printf("Failed to plan FFT\n");
    exit(0);
  }

  // Execute the plan.
  if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_FORWARD) != CUFFT_SUCCESS) {
    printf ("Failed Executing FFT\n");
    exit(0);
  }

}

void
signalIFFT(Complex *d_signal, int signal_size) {

  cufftHandle plan;
  if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
    printf("Failed to plan IFFT\n");
    exit(0);
  }

  // Execute the plan.
  if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE) != CUFFT_SUCCESS) {
    printf ("Failed Executing IFFT\n");
    exit(0);
  }

}

int main()
{

  Complex *h_signal, *d_signal1;

  int alloc_size, i;

  alloc_size = 16;

  // Kernel Block and Grid Size.
  const dim3 blockSize(16, 16, 1);
  const dim3 gridSize(alloc_size / 16 + 1, alloc_size / 16 + 1, 1);

  h_signal = (Complex *) malloc(sizeof(Complex) * alloc_size);

  cudaMalloc(&d_signal1, sizeof(Complex) * alloc_size);
  if (cudaGetLastError() != cudaSuccess){
    printf("Cuda error: Failed to allocate\n");
    exit(0);
  }
  //cudaMalloc(&d_signal2, sizeof(Complex) * alloc_size);

  // Add random data to signal.
  randomFill(h_signal, alloc_size, REAL);
  printData(h_signal, alloc_size, "Random H1");

  cudaMemcpy(d_signal1, h_signal, sizeof(Complex) * alloc_size, cudaMemcpyHostToDevice);

  signalFFT(d_signal1, alloc_size);

  signalIFFT(d_signal1, alloc_size);

  cudaDeviceSynchronize();

  cudaMemcpy(h_signal, d_signal1, sizeof(Complex) * alloc_size, cudaMemcpyDeviceToHost);

  printData(h_signal, alloc_size, "IFFT");

  return 0;
}

score 5 · Accepted Answer

写好问题的建议：

在问题中发布您的代码，而不是在外部链接中。有一天，该链接将失效，您对未来读者的问题也将失效。
在问题中发布您的实际数据（在这种情况下没有那么多）。
发布或确定您期望的数据是什么，以及为什么。

另一个注意事项：

cufft 文档指示您使用 cufftComplex，而不是您自己的数据类型，尽管您的工作正常。如果 cufft 的开发人员出于某种奇怪的原因更改了他们的数据布局，那么您的代码将在重新编译时中断。如果您使用推荐的数据类型，则不应使用。

现在关于您的问题，我运行了您的代码并得到了如下结果：

Random H1
0.840188 0.000000
0.394383 0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 0.000000
0.277775 0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 0.000000
0.364784 0.000000
0.513401 0.000000
0.952230 0.000000
0.916195 0.000000
IFFT
13.443005 0.000000
6.310127 -0.000000
12.529589 0.000000
12.775041 0.000000
14.586359 -0.000000
3.160823 0.000000
5.363565 0.000000
12.291674 -0.000000
4.444397 -0.000000
8.863521 0.000000
7.638353 0.000000
10.061934 -0.000000
5.836554 0.000000
8.214415 -0.000000
15.235678 -0.000000
14.659121 -0.000000

您的代码中唯一似乎缺少的是，您没有将结果除以转换的长度（在本例中为 16）来取回原始数据（如此处示例代码中所建议的那样）。当我这样做时，我会得到我认为是预期的结果：

Random H1
0.840188 0.000000
0.394383 0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 0.000000
0.277775 0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 0.000000
0.364784 0.000000
0.513401 0.000000
0.952230 0.000000
0.916195 0.000000
IFFT
0.840188 0.000000
0.394383 -0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 -0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 -0.000000
0.277775 -0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 -0.000000
0.364785 0.000000
0.513401 -0.000000
0.952230 -0.000000
0.916195 -0.000000

顺便说一句，感谢提供完整的、可编译的代码示例。

cuda - CUDA 逆 FFT 错误

1 回答 1

Related

Reference