在 Matlab 中,当我输入一个复数的一维数组时,我有一个具有相同大小和相同维度的实数的数组的输出。尝试在 CUDA C 中重复此操作,但输出不同。你能帮忙吗?在 Matlab 中,当我输入 ifft(array)
我的arrayOfComplexNmbers:
[4.6500 + 0.0000i 0.5964 - 1.4325i 0.4905 - 0.5637i 0.4286 - 0.2976i 0.4345 - 0.1512i 0.4500 + 0.0000i 0.4345 + 0.1512i 0.4286 + 0.2976i 0.4905 + 0.5637i 0.5964 + 1.4325i]
我的arrayOfRealNumbers:
[ 0.9000 0.8000 0.7000 0.6000 0.5000 0.4000 0.3000 0.2000 0.1500 0.1000]
当我进入ifft(arrayOfComplexNmbers)
Matlab 时,我的输出是arrayOfRealNumbers
. 谢谢!这是我的 CUDA 代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include "device_launch_parameters.h"
#include "device_functions.h"
#define NX 256
#define NY 128
#define NRANK 2
#define BATCH 1
#define SIGNAL_SIZE 10
typedef float2 Complex;
__global__ void printCUDAVariables_1(cufftComplex *cudaSignal){
int index = threadIdx.x + blockIdx.x*blockDim.x;
printf("COMPLEX CUDA %d %f %f \n", index, cudaSignal[index].x, cudaSignal[index].y);
}
__global__ void printCUDAVariables_2(cufftReal *cudaSignal){
int index = threadIdx.x + blockIdx.x*blockDim.x;
printf("REAL CUDA %d %f \n", index, cudaSignal);
}
int main() {
cufftHandle plan;
//int n[NRANK] = { NX, NY };
Complex *h_signal = (Complex *)malloc(sizeof(Complex)* SIGNAL_SIZE);
float *r_signal = 0;
if (r_signal != 0){
r_signal = (float*)realloc(r_signal, SIGNAL_SIZE * sizeof(float));
}
else{
r_signal = (float*)malloc(SIGNAL_SIZE * sizeof(float));
}
int mem_size = sizeof(Complex)* SIGNAL_SIZE * 2;
h_signal[0].x = (float)4.65;
h_signal[0].y = (float)0;
h_signal[1].x = (float)0.5964;
h_signal[1].y = (float)0;
h_signal[2].x = (float)4.65;
h_signal[2].y = (float)-1.4325;
h_signal[3].x = (float)0.4905;
h_signal[3].y = (float)0.5637;
h_signal[4].x = (float)0.4286;
h_signal[4].y = (float)-0.2976;
h_signal[5].x = (float)0.4345;
h_signal[5].y = (float)-0.1512;
h_signal[6].x = (float)0.45;
h_signal[6].y = (float)0;
h_signal[7].x = (float)0.4345;
h_signal[7].y = (float)-0.1512;
h_signal[8].x = (float)0.4286;
h_signal[8].y = (float)0.2976;
h_signal[9].x = (float)0.4905;
h_signal[9].y = (float)-0.5637;
h_signal[10].x = (float)0.5964;
h_signal[10].y = (float)1.4325;
//for (int i = 0; i < SIGNAL_SIZE; i++){
// printf("RAW %f %f\n", h_signal[i].x, h_signal[i].y);
//}
//allocate device memory for signal
cufftComplex *d_signal, *d_signal_out;
cudaMalloc(&d_signal, mem_size);
cudaMalloc(&d_signal_out, mem_size);
cudaMemcpy(d_signal, h_signal, mem_size, cudaMemcpyHostToDevice);
printCUDAVariables_1 << <10, 1 >> >(d_signal);
//cufftReal *odata;
//cudaMalloc((void **)&odata, sizeof(cufftReal)*NX*(NY / 2 + 1));
//cufftPlan1d(&plan, SIGNAL_SIZE, CUFFT_C2R, BATCH);
cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH);
cufftExecC2C(plan, d_signal, d_signal_out, CUFFT_INVERSE);
//cufftExecC2R(plan, d_signal, odata);
cudaDeviceSynchronize();
printCUDAVariables_1 << <10, 1 >> >(d_signal_out);
//printCUDAVariables_2 << <10, 1 >> >(odata);
//cudaMemcpy(h_signal, d_signal_out, SIGNAL_SIZE*2*sizeof(float), cudaMemcpyDeviceToHost);
cufftDestroy(plan);
cudaFree(d_signal);
cudaFree(d_signal_out);
return 0;
}