我在 CUDA 上做一个简单的 n 体模拟,然后我试图用 OpenGL 进行可视化。
在我在 CPU 上初始化我的粒子数据、分配相应的内存并在 GPU 上传输该数据之后,程序必须进入以下循环:
1)计算每个粒子上的力(CUDA部分)
2)更新粒子位置(CUDA部分)
3)显示这个时间步的粒子(OpenGL部分)
4) 回到 1)
CUDA和OpenGL之间的接口我用下面的代码实现:
GLuint dataBufferID;
particle_t* Particles_d;
particle_t* Particles_h;
cudaGraphicsResource *resources[1];
我在 OpenGLs Array_Buffer 上分配空间,并使用以下代码将后者注册为 cudaGraphicsResource:
void createVBO()
{
// create buffer object
glGenBuffers(1, &dataBufferID);
glBindBuffer(GL_ARRAY_BUFFER, dataBufferID);
glBufferData(GL_ARRAY_BUFFER, bufferStride*N*sizeof(float), 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
checkCudaErrors(cudaGraphicsGLRegisterBuffer(resources, dataBufferID, cudaGraphicsMapFlagsNone));
}
最后,我描述的程序周期(步骤 1 到 4)由以下函数 update(int) 实现
void update(int value)
{
// map OpenGL buffer object for writing from CUDA
float* dataPtr;
checkCudaErrors(cudaGraphicsMapResources(1, resources, 0));
size_t num_bytes;
//get a pointer to that buffer object for manipulation with cuda!
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&dataPtr, &num_bytes,resources[0]));
//fill the Graphics Resource with particle position Data!
launch_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(Particles_d,dataPtr,1);
// unmap buffer object
checkCudaErrors(cudaGraphicsUnmapResources(1, resources, 0));
glutPostRedisplay();
glutTimerFunc(milisec,update,0);
}
我编译结束我得到以下错误:
src/main.cu:390 处的 CUDA 错误代码 = 4(cudaErrorLaunchFailure)“cudaGraphicsMapResources(1,资源,0)”
src/main.cu:392 处的 CUDA 错误 code=4(cudaErrorLaunchFailure) “cudaGraphicsResourceGetMappedPointer((void **)&dataPtr, &num_bytes,resources[0])”
src/main.cu:397 处的 CUDA 错误代码 = 4(cudaErrorLaunchFailure)“cudaGraphicsUnmapResources(1,资源,0)”
有谁知道该异常的原因可能是什么?我是否应该在每次执行 update(int) 之前使用 createVBO() 创建 dataBuffer ...?
ps 为了更清楚,我的内核函数如下:
__global__ void launch_kernel(particle_t* Particles,float* data, int KernelMode){
int i = blockIdx.x*THREADS_PER_BLOCK + threadIdx.x;
if(KernelMode == 1){
//N_d is allocated on device memory
if(i > N_d)
return;
//and update dataBuffer!
updateX(Particles+i);
for(int d=0;d<DIM_d;d++){
data[i*bufferStride_d+d] = Particles[i].p[d]; // update the new coordinate positions in the data buffer!
}
// fill in also the RGB data and the radius. In general THIS IS NOT NECESSARY!! NEED TO PERFORM ONCE! REFACTOR!!!
data[i*bufferStride_d+DIM_d] =Particles[i].r;
data[i*bufferStride_d+DIM_d+1] =Particles[i].g;
data[i*bufferStride_d+DIM_d+2] =Particles[i].b;
data[i*bufferStride_d+DIM_d+3] =Particles[i].radius;
}else{
// if KernelMode = 2 then Update Y
float* Fold = new float[DIM_d];
for(int d=0;d<DIM_d;d++)
Fold[d]=Particles[i].force[d];
//of course in parallel :)
computeForces(Particles,i);
updateV(Particles+i,Fold);
delete [] Fold;
}
// in either case wait for all threads to finish!
__syncthreads();
}