1

我无法在这个简短的 cuda 代码中找到分段错误的来源。我正在使用它来测试 Thrust 库与 STL 库对整数进行排序的排序速度。我正在传递要作为命令行参数排序的双精度数组的大小。

这是代码

inline void check_cuda_error(char *message)

    {
      cudaThreadSynchronize();
      cudaError_t error = cudaGetLastError();
      if(error != cudaSuccess)
      {
        printf("CUDA error after %s: %s\n", message, cudaGetErrorString(error));
      }
    }



            int main(int argc, char *argv[])
        {
          int  N = atoi(argv[1]);
          double* h = new double[N];
          for (int i = 0; i < N; ++i)
            {
              h[i] = (double)rand()/RAND_MAX; //std::cout << h[i] << " " ;
            }

          clock_t start , stop;

          std::cout << std::endl;

          // Start timing
          start = clock();
          std::sort(h, h+N);
          stop  = clock();  
          std::cout << "Host sorting took " << (stop - start) /(double)CLOCKS_PER_SEC << std::endl ; 


          // Start the GPU work. Initialize to random numbers again.
          for (int i = 0; i < N; ++i)
            {
              h[i] = (double)rand()/RAND_MAX; //std::cout << h[i] << " " ;
            }
          double* d = 0; 
          const size_t num_bytes = N * sizeof( double ); 
          cudaMalloc((void**)&d, num_bytes);
          check_cuda_error("Memory Allocation"); 

          cudaMemcpy(d ,h , N * sizeof(double), cudaMemcpyHostToDevice); // Transfer data
          thrust::sort( d, d+ N ) ;
            return 0;
        }

我收到以下错误

[BeamerLatex/Farber]$ nvcc -arch=sm_20 sortcompare.cu  ; ./a.out 16777216

Host sorting took 3.77
[1]    4661 segmentation fault  ./a.out 16777216
[BeamerLatex/Farber]$ 
4

1 回答 1

2

似乎您不能在原始指针上运行推力::排序,您需要device_ptr先将其转换为,即:

thrust::device_ptr< double > dv = thrust::device_pointer_cast(d);
thrust::sort( dv, dv+ NN ) ;

这对我来说很好。

于 2012-08-23T14:10:54.230 回答