1

我正在编写一个用于使用CUDA处理GPS信号的程序。所以我需要使用fft,但是发生了错误。

 CUDA error at F:/clouddrive kingsoft/acc/accfinal/accfinal/acc.cu:341 code=2(CUF
FT_ALLOC_FAILED) "cufftPlan1d(&plan, new_size, CUFFT_C2C, 1)"

代码在这里。

 double fft_Ifft_Sum(Complex *h_signal,Complex *h_filter_kernel,double* list,bool firstEnter)
{
   double max=0;
    int new_size=samplesPerCode;
    int mem_size = sizeof(Complex) * new_size;

    // Allocate device memory for signal
    Complex *d_signal;
    checkCudaErrors(cudaMalloc((void **)&d_signal, mem_size));
    // Copy host memory to device
    checkCudaErrors(cudaMemcpy(d_signal, h_signal, mem_size,cudaMemcpyHostToDevice));

    // Allocate device memory for filter kernel
   if(firstEnter)
   { 
     checkCudaErrors(cudaFree(d_filter_kernel));
     checkCudaErrors(cudaMalloc((void **)&d_filter_kernel, mem_size));

     checkCudaErrors(cudaMemcpy(d_filter_kernel, h_filter_kernel, mem_size,
                               cudaMemcpyHostToDevice));
   }

    // CUFFT plan
    cufftHandle plan;
    checkCudaErrors(cufftPlan1d(&plan, new_size, CUFFT_C2C, 1));

    // Transform signal and kernel

    checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));

     ComplexPointwiseMulAndScale_p<<<1024, 1024>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);   
// Check if kernel execution generated and error
    getLastCudaError("Kernel execution failed [ ComplexPointwiseMulAndScale ]");

    // Transform signal back
    //printf("Transforming signal back cufftExecC2C\n");
    checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));
    cudaThreadSynchronize();

    // Copy device memory to host
     Complex *h_convolved_signal = h_signal;
    checkCudaErrors(cudaMemcpy(h_convolved_signal, d_signal, mem_size,

                      cudaMemcpyDeviceToHost));
    // list=new double[new_size];
    for(int i=0;i<new_size;i++)
    {
        list[i]=h_convolved_signal[i].x*h_convolved_signal[i].x+h_convolved_signal[i].y*h_convolved_signal[i].y;
        if(list[i]>max)max=list[i];
    }


   return max;



    //Destroy CUFFT context
    checkCudaErrors(cufftDestroy(plan));

    // cleanup memory
    free(h_signal);
    free(h_filter_kernel);


    checkCudaErrors(cudaFree(d_signal));
    checkCudaErrors(cudaFree(d_filter_kernel));
    cudaDeviceReset();
}

并且这个函数会在main中调用大约1900次,它可以顺利运行到大约1440次,这里会出现错误。我不知道为什么。谢谢。

4

1 回答 1

1

d_signal每次进入该函数时,您都在分配设备内存,但从不释放它。在任何 or destroy 操作之前,您的函数中有一条return语句free,因此如果您重复调用此函数,这对我来说似乎是个问题。

我认为编译器也会根据您所显示的内容发出有关无法访问代码的警告。

于 2013-09-25T07:23:30.693 回答