-1

我有一个 300,000 个点的数组,我想要每 600 个点的 fft。我正在尝试使用 cufftPlanMany 执行,但我在这里收到一个未知错误:

cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));

retrevialfft.cu(82) : cufftSafeCall() CUFFT error: <unknown>

这是上下文中的代码

  cudaSetDevice(0);

  // Allocate host memory for the signal
  cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);


  // Initalize the memory for the signal
  for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
    h_signal[i].x = rand() / (float)RAND_MAX;
    h_signal[i].y = 0;

    //    printf("Orignal: %f %f \n", h_signal[i].x, h_signal[i].y);
  }




  int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE;

  // Allocate device memory for signal
  cufftComplex* d_signal;
  cudaMalloc((void**)&d_signal, mem_size);

   int rank = 1; //1d plan                                                                                                                     
   int numCols = 300000;
   int n[] = {numCols};

   int batch = 500;
   int istride = 1;
   int ostride = 1;
   int idist = numCols;

  // CUFFT plan                                                                                                                                
   cufftHandle plan;
   cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));

  // Transform signal                                                                                                                          
  printf("Transforming signal cufftExecC2C\n");
  cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));



     // Copy device memory to host                                                                                                                
  cufftComplex* h_transformed = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);;
  cudaMemcpy(h_transformed, d_signal, mem_size,
                           cudaMemcpyDeviceToHost);



//Destroy CUFFT context                                                                                                                      
  cufftDestroy(plan);

  // cleanup memory                                                                                                                            
  free(h_signal);

  free(h_transformed);
  cudaFree(d_signal);
  cudaDeviceReset();

知道错误实际上是什么吗?

4

1 回答 1

1

您决定不再详细说明您的问题。下面,我提供了一个完整的工作代码,cufftPlanMany()用于执行批量 1D FFT。我希望它有所帮助。

#include <stdio.h>
#include <stdlib.h>
#include <cufft.h>
#include <assert.h>

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
    if (code != cudaSuccess) 
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) { getchar(); exit(code); }
    }
}

/*********************/
/* CUFFT ERROR CHECK */
/*********************/
static const char *_cudaGetErrorEnum(cufftResult error)
{
    switch (error)
    {
        case CUFFT_SUCCESS:
            return "CUFFT_SUCCESS";

        case CUFFT_INVALID_PLAN:
            return "CUFFT_INVALID_PLAN";

        case CUFFT_ALLOC_FAILED:
            return "CUFFT_ALLOC_FAILED";

        case CUFFT_INVALID_TYPE:
            return "CUFFT_INVALID_TYPE";

        case CUFFT_INVALID_VALUE:
            return "CUFFT_INVALID_VALUE";

        case CUFFT_INTERNAL_ERROR:
            return "CUFFT_INTERNAL_ERROR";

        case CUFFT_EXEC_FAILED:
            return "CUFFT_EXEC_FAILED";

        case CUFFT_SETUP_FAILED:
            return "CUFFT_SETUP_FAILED";

        case CUFFT_INVALID_SIZE:
            return "CUFFT_INVALID_SIZE";

        case CUFFT_UNALIGNED_DATA:
            return "CUFFT_UNALIGNED_DATA";
    }

    return "<unknown>";
}

#define cufftSafeCall(err)      __cufftSafeCall(err, __FILE__, __LINE__)
inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
{
    if( CUFFT_SUCCESS != err) {
        fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
            _cudaGetErrorEnum(err)); \
            cudaDeviceReset(); assert(0); \
    }
}

/********/
/* MAIN */
/********/
void main() {

    int batch = 3;                          // --- How many transforms to be performed
    int numCols = 16;                       // --- Size of each transform

    int SIGNAL_SIZE = batch * numCols;      // --- Overall size for all the signals 

    // --- Allocate host memory for all the signals
    cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);

    // --- Initalize host memory for all the signals
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
        h_signal[i].x = 1.f;
        h_signal[i].y = 0.f;
    }

    // --- Allocate device memory for all the signals
    cufftComplex* d_signal; gpuErrchk(cudaMalloc((void**)&d_signal, sizeof(cufftComplex) * SIGNAL_SIZE));

    // --- Host to Device memcopy
    gpuErrchk(cudaMemcpy(d_signal, h_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyHostToDevice));

    int rank = 1; // --- 1d plan                                                                                                                     
    int n[] = {numCols};

    int istride = 1;
    int ostride = 1;
    int idist = numCols;
    int odist = numCols;

    // --- CUFFT plan                                                                                                                                
    cufftHandle plan;
    cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, ostride, odist, CUFFT_C2C, 500));

    // --- Signals transformations
    cufftSafeCall(cufftExecC2C(plan, (cufftComplex*)d_signal, (cufftComplex*)d_signal, CUFFT_FORWARD));

    // --- Device to Host memcopy
    gpuErrchk(cudaMemcpy(h_signal, d_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost));

    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) printf("Real part = %f; Imaginar part = %f\n", h_signal[i].x, h_signal[i].y);

    // --- Destroy CUFFT context                                                                                                                      
    cufftSafeCall(cufftDestroy(plan));

    // --- Memory cleanup
    free(h_signal);
    gpuErrchk(cudaFree(d_signal));

    cudaDeviceReset();

}
于 2014-08-29T07:08:41.793 回答