我正在尝试使用 cufft 库为 fft 1d 转换编写一个简单的代码。
我以 cufft 库教程(链接)上的代码为例,但转换前和逆变换后的数据不一样。为什么 ?
这是输出:
[1DCUFFT] starting...
[1DCUFFT] is starting...
Transforming signal cufftExecC2C
Transforming signal back cufftExecC2C
first : 0.840188 0.000000 after 2.020520 -2.465333
first : 0.394383 0.000000 after 2.690347 -2.105700
first : 0.783099 0.000000 after 3.155561 -1.491952
first : 0.798440 0.000000 after 3.309971 -0.761629
first : 0.911647 0.000000 after 3.139909 -0.092953
first : 0.197551 0.000000 after 2.734147 0.355526
first : 0.335223 0.000000 after 2.256154 0.501509
first : 0.768230 0.000000 after 1.887422 0.369626
first : 0.277775 0.000000 after 1.762471 0.083391
first : 0.553970 0.000000 after 1.920150 -0.179526
first : 0.477397 0.000000 after 2.289955 -0.252255
first : 0.628871 0.000000 after 2.718623 -0.039409
first : 0.364784 0.000000 after 3.026649 0.449287
first : 0.513401 0.000000 after 3.072947 1.104457
first : 0.952230 0.000000 after 2.803322 1.758350
first : 0.916195 0.000000 after 2.265463 2.245056
这是我的源代码,教程示例建议将数据集大小的输出划分为具有相同的值。
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cufft.h>
#include <cutil_inline.h>
#include <shrQATest.h>
void runTest(int argc, char** argv);
// The filter size is assumed to be a number smaller than the signal size
#define SIGNAL_SIZE 16
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
runTest(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void runTest(int argc, char** argv)
{
printf("[1DCUFFT] is starting...\n");
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);
// Allocate host memory for the signal
//Complex* h_signal = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
h_signal[i].x = rand() / (float)RAND_MAX;
h_signal[i].y = 0;
}
int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE;
// Allocate device memory for signal
cufftComplex* d_signal;
cutilSafeCall(cudaMalloc((void**)&d_signal, mem_size));
// Copy host memory to device
cutilSafeCall(cudaMemcpy(d_signal, h_signal, mem_size,
cudaMemcpyHostToDevice));
// CUFFT plan
cufftHandle plan;
cufftSafeCall(cufftPlan1d(&plan, mem_size, CUFFT_C2C, 1));
// Transform signal
printf("Transforming signal cufftExecC2C\n");
cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
// Transform signal back
printf("Transforming signal back cufftExecC2C\n");
cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));
// Copy device memory to host
cufftComplex* h_inverse_signal = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);;
cutilSafeCall(cudaMemcpy(h_inverse_signal, d_signal, mem_size,
cudaMemcpyDeviceToHost));
for(int i=0;i< SIGNAL_SIZE;i++){
h_inverse_signal[i].x= h_inverse_signal[i].x/(float)SIGNAL_SIZE;
h_inverse_signal[i].y= h_inverse_signal[i].y/(float)SIGNAL_SIZE;
printf("first : %f %f after %f %f \n",h_signal[i].x,h_signal[i].y,h_inverse_signal[i].x,h_inverse_signal[i].y);
}
//Destroy CUFFT context
cufftSafeCall(cufftDestroy(plan));
// cleanup memory
free(h_signal);
free(h_inverse_signal);
cutilSafeCall(cudaFree(d_signal));
cutilDeviceReset();
}