我正在使用 Cuda 驱动程序 api,现在我遇到了关于创建 Cuda 纹理对象的问题,它总是返回 CUDA_ERROR_INVALUD_VALUE。
我发现函数 cuTexObjectCreate 被宏包装:
#if __CUDA_API_VERSION >= 5000
...function decl...
#endif
而我由 cuCtxCreate_v2 创建的 Cuda 上下文总是返回 api 版本 3020。这是否意味着我应该创建一个高版本的上下文?我应该调用哪个函数?或者是别的什么?
这是示例代码:
struct CudaDriverTest
{
CUdevice m_device = 0;
CUcontext m_primaryContext = nullptr;
CUcontext m_context = nullptr;
CUarray m_array = nullptr;
CUtexObject m_texture = 0;
CUdeviceptr m_output = 0;
CudaDriverTest(size_t w, size_t h, float* data) :
m_image(w, h, QImage::Format_Grayscale8)
{
// begin cuda driver api staff
HANDLE_ERROR(cuInit(0));
int deviceCount = 0;
HANDLE_ERROR(cuDeviceGetCount(&deviceCount));
assert(deviceCount == 1);
HANDLE_ERROR(cuDeviceGet(&m_device, 0));
char name[256];
HANDLE_ERROR(cuDeviceGetName(name, 256, m_device));
std::cout << "device name:" << name << std::endl;
int major = 0;
int minor = 0;
HANDLE_ERROR(cuDeviceComputeCapability(&major, &minor, m_device));
std::cout << "major compute capability : " << major << ", minor compute capability : " << minor << std::endl;
HANDLE_ERROR(cuCtxCreate(&m_context, CU_CTX_SCHED_AUTO, m_device));
std::uint32_t version;
HANDLE_ERROR(cuCtxGetApiVersion(m_context, &version));
std::cout << "context api version : " << version << std::endl;
// array
CUDA_ARRAY_DESCRIPTOR arrDesc;
memset(&arrDesc, 0, sizeof(arrDesc));
arrDesc.Format = CUarray_format::CU_AD_FORMAT_FLOAT;
arrDesc.NumChannels = 1;
arrDesc.Width = w;
arrDesc.Height = h;
HANDLE_ERROR(cuArrayCreate(&m_array, &arrDesc));
CUDA_MEMCPY2D cpy2d;
memset(&cpy2d, 0, sizeof(cpy2d));
cpy2d.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_HOST;
cpy2d.srcHost = data;
cpy2d.srcPitch = w * sizeof(float);
cpy2d.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
cpy2d.dstArray = m_array;
cpy2d.dstPitch = w * sizeof(float);
cpy2d.WidthInBytes = w * sizeof(float);
cpy2d.Height = h;
HANDLE_ERROR(cuMemcpy2D(&cpy2d));
// texture object
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = CUresourcetype::CU_RESOURCE_TYPE_ARRAY;
resDesc.res.array.hArray = m_array;
CUDA_TEXTURE_DESC texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = CUaddress_mode::CU_TR_ADDRESS_MODE_WRAP;
texDesc.addressMode[1] = CUaddress_mode::CU_TR_ADDRESS_MODE_WRAP;
texDesc.addressMode[2] = CUaddress_mode::CU_TR_ADDRESS_MODE_WRAP;
texDesc.filterMode = CUfilter_mode::CU_TR_FILTER_MODE_LINEAR;
texDesc.flags = CU_TRSF_READ_AS_INTEGER;
CUDA_RESOURCE_VIEW_DESC viewDesc;
memset(&viewDesc, 0, sizeof(viewDesc));
viewDesc.width = w;
viewDesc.height = h;
viewDesc.format = CUresourceViewFormat::CU_RES_VIEW_FORMAT_FLOAT_1X32;
HANDLE_ERROR(cuTexObjectCreate(&m_texture, &resDesc, &texDesc, &viewDesc));
// output
HANDLE_ERROR(cuMemAlloc(&m_output, w * h * sizeof(float)));
}
}
PS:工作环境:GTX960;Cuda7.5;windows7-x64;visual studio 2013;