这个程序:
#include <string>
#include <stdexcept>
struct buffers_t {
void* host_buffer;
void* device_buffer;
};
void ensure_no_error(std::string message) {
auto status = cudaGetLastError();
if (status != cudaSuccess) {
throw std::runtime_error(message + ": " + cudaGetErrorString(status));
}
}
void my_callback(cudaStream_t stream, cudaError_t status, void* args) {
auto buffers = (buffers_t *) args;
cudaMemcpyAsync(
buffers->host_buffer, buffers->device_buffer,
1, cudaMemcpyDefault, stream);
ensure_no_error("after cudaMemcpyAsync");
}
int main() {
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
buffers_t buffers;
cudaMallocHost(&buffers.host_buffer, 1);
cudaMalloc(&buffers.device_buffer, 1);
cudaStreamAddCallback(stream, my_callback, &buffers, 0);
ensure_no_error("after enqueue callback");
cudaStreamSynchronize(stream);
ensure_no_error("after sync");
}
产量:
terminate called after throwing an instance of 'std::runtime_error'
what(): after cudaMemcpyAsync: operation not permitted
Aborted
这有点奇怪,因为 API 参考cudaMemcpyAsync
并未将cudaErrorNotPermitted
其列为潜在错误之一。从回调调度异步副本真的有问题吗?
注意:我的机器有 GTX 650 Ti (CC 3.0)、CUDA 9.0、Linux 内核 4.8.0、驱动程序 384.59。