假设我有一个结构:
typedef enum {ON_CPU,ON_GPU,ON_BOTH} memLocation;
typedef struct foo *foo;
struct foo {
cudaEvent_t event;
float *deviceArray;
float *hostArray;
memLocation arrayLocation;
};
一个功能:
void copyArrayFromCPUToGPUAsync(foo bar, cudaStream_t stream)
{
cudaStreamWaitEvent(stream, bar->event);
if (bar->arrayLocation == ON_CPU) {
// ON_CPU means !ON_GPU and !ON_BOTH
cudaMemcpyAsync(cudaMemcpyHostToDevice, stream);
bar->arrayLocation = ON_BOTH;
}
cudaEventRecord(bar->event, stream);
}
void doWorkOnGPUAsync(foo bar, cudaStream_t stream)
{
cudaStreamWaitEvent(stream, bar->event);
// do async work
cudaEventRecord(bar->event, stream);
}
以及以下场景(狮子、女巫和衣柜也适合某个地方):
// stream1, stream2, and stream3 have no prior work
// assume bar->arrayLocation = ON_GPU
doWorkOnGPUAsync(bar, stream1);
copyArrayFromCPUToGPUAsync(bar, stream2); // A no-op
doWorkOnGPUAsync(bar, stream3);
以上安全吗?即如果它本身不起作用,它stream2
仍然会等待完成它的“工作”吗?stream1
结果记录是否会cudaEvent
反映这一点,stream3
直到stream1
完成才开始?