我是 CUDA 的初学者。我正在使用 NVIDIA Geforce GTX 1070 和 CUDA 工具包 11.3 和 ubuntu 18.04。如下代码所示,我使用两个 CPU 线程将两个内核以两个流的形式发送到一个 GPU。我希望这两个内核同时发送到 GPU。有没有办法做到这一点?
或者至少比我做的更好。
先感谢您。
我的代码:
//Headers
pthread_cond_t cond;
pthread_mutex_t cond_mutex;
unsigned int waiting;
cudaStream_t streamZero, streamOne;
//Kernel zero defined here
__global__ void kernelZero(){...}
//Kernel one defined here
__global__ void kernelOne(){...}
//This function is defined to synchronize two threads when sending kernels to the GPU.
void threadsSynchronize(void) {
pthread_mutex_lock(&cond_mutex);
if (++waiting == 2) {
pthread_cond_broadcast(&cond);
} else {
while (waiting != 2)
pthread_cond_wait(&cond, &cond_mutex);
}
pthread_mutex_unlock(&cond_mutex);
}
void *threadZero(void *_) {
// ...
threadsSynchronize();
kernelZero<<<blocksPerGridZero, threadsPerBlockZero, 0, streamZero>>>();
cudaStreamSynchronize(streamZero);
// ...
return NULL;
}
void *threadOne(void *_) {
// ...
threadsSynchronize();
kernelOne<<<blocksPerGridOne, threadsPerBlockOne, 0, streamOne>>>();
cudaStreamSynchronize(streamOne);
// ...
return NULL;
}
int main(void) {
pthread_t zero, one;
cudaStreamCreate(&streamZero);
cudaStreamCreate(&streamOne);
// ...
pthread_create(&zero, NULL, threadZero, NULL);
pthread_create(&one, NULL, threadOne, NULL);
// ...
pthread_join(zero, NULL);
pthread_join(one, NULL);
cudaStreamDestroy(streamZero);
cudaStreamDestroy(streamOne);
return 0;
}