1

我尝试了 cudaMemcpy2DFromArray 和 cudaMemcpy2D,但它们都不能正常工作。工作不正常,我的意思是 GpuMat 确实从 cudaArray 复制了一些东西,但水平比例是错误的。

代码片段如下:

cudaArray *colorArr;
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &colorArr, cudaResourceColor, 0, 0 ) );

cv::gpu::GpuMat gpuColorMat(Size(w,h), CV_32FC3);   

// Tried method 1: the following didn't work correctly
checkCudaErrors( cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step, colorArr, 
    0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice ) );

// Tried method 2: also didn't work correctly. Same error as the first method.
float3 *cuarr; 
checkCudaErrors( cudaMalloc( (void**)&cuarr, w*h*sizeof(float3) ) );
checkCudaErrors( cudaMemcpy2DFromArray( cuarr, w*h, colorArr, 0, 0, w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );
checkCudaErrors( cudaMemcpy2D( (float*)gpuColorMat.data, gpuColorMat.step, cuarr, w*sizeof(float3), w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );

// unmap buffer objects
checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

谁能帮我这个?

4

2 回答 2

2

我终于让它工作了。我在下面分享我的代码:

.cu 文件:进行设备阵列复制。渲染图像绑定到纹理 inTex,并复制到目标 float3 *dst。

texture<float4, 2, cudaReadModeElementType> inTex;

__global__ void CuDeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height )     
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if ( x > width || y > height ) return;

    float4 res = tex2D(inTex, x, y);
    float3* row_y = (float3*)((char*)dst + y * dstStep);
    row_y[x] = make_float3(res.x, res.y, res.z);
}

// round up n/m
inline int iDivUp(int n, int m)
{
    return (n + m - 1) / m;
}

void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height ) 
{
    dim3 threads( 64, 1 );
    dim3 grid = dim3( iDivUp( width, threads.x ), height/threads.y );
    CuDeviceArrayCopyFromTexture <<< grid, threads >>> ( dst, dstStep, width, height );
}

void BindToTexture( cudaArray *cuArr )
{
     checkCudaErrors( cudaBindTextureToArray( inTex, cuArr ) );
}

.cpp 文件:设置 gl 渲染纹理,绑定到 cuda 纹理并调用设备数组复制方法。

glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &fboColorTex);
glBindTexture(GL_TEXTURE_2D, fboColorTex);
// I used RGB16F and RGB32F, both not working. So I changed to GL_RGBA16F and it could be mapped to cudaArray as float4 element.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
checkCudaErrors( cudaGraphicsGLRegisterImage( &cudaResourceColor, fboColorTex, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly ) );

extern void BindToTexture( cudaArray *cuArr );
extern void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height );

static GpuMat gpuMat( Size(w,h), CV_32FC3 );
cudaArray *cuArr;

// Copy color buffer
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &cuArr, cudaResourceColor, 0, 0 ) );

BindToTexture( cuArr );
DeviceArrayCopyFromTexture( (float3*)gpuMat.data, gpuMat.step, gpuMat.cols, gpuMat.rows  );

checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

参考:

  1. http://answers.opencv.org/question/12958/read-rendered-images-using-gpumat-and-cuda/

  2. CUDA 示例\v5.5\3_Imaging\postProcessGL

于 2013-11-03T16:16:04.720 回答
1

GpuMat::step 以元素数量为单位,而pitch以字节为单位,因此请尝试从

cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step,                  colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );

cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step * sizeof(float3), colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );
于 2013-11-01T05:30:04.643 回答