操作系统:CentOS 7 Cuda Toolkit 版本:11.0
Nvidia 驱动程序和 GPU 信息:
NVIDIA-SMI 450.51.05
驱动程序版本:450.51.05
CUDA 版本:11.0
GPU:Quadro M2000M
我对 cuda 编程非常陌生,因此非常感谢任何指导。我有一个非常简单的 cuda c++ 程序,它计算 GPU 上统一内存中两个数组的总和。但是,由于 cudaErrorNoKernelImageForDevice 错误,内核似乎无法启动。代码如下:
using namespace std;
#include <iostream>
#include <math.h>
#include <cuda_runtime_api.h>
__global__
void add(int n, float *x, float*y){
for (int i = 0; i < n; i++)
y[i] = x[i] + y[i];
}
int main() {
cout << "!!!Hello World!!!" << endl; // prints !!!Hello World!!!
int N = 1<<20;
float *x, *y;
cudaMallocManaged((void**)&x, N*sizeof(float));
cudaMallocManaged((void**)&y, N*sizeof(float));
for(int i = 0; i < N; i++){
x[i] = 1.0f;
y[i] = 2.0f;
}
add<<<1, 1>>>(N, x, y);
cudaGetLastError();
/**
* This indicates that there is no kernel image available that is suitable
* for the device. This can occur when a user specifies code generation
* options for a particular CUDA source file that do not include the
* corresponding device configuration.
*
* cudaErrorNoKernelImageForDevice = 209,
*/
cudaDeviceSynchronize();
float maxError = 0.0f;
for (int i = 0; i < N; i++){
maxError = fmax(maxError, fabs(y[i]-3.0f));
}
cudaFree(x);
cudaFree(y);
return 0;
}