1

我遇到了 cudaBindTexture2D 的问题。以下代码是我为重现此问题而创建的,但是这个...有效:

#include "cuda.h"
#include <stdio.h>

// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(err)  __checkCudaErrors (err, __FILE__, __LINE__)

inline void __checkCudaErrors(cudaError err, const char *file, const int line )
{
    if(cudaSuccess != err)
    {
        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",file, line, (int)err, cudaGetErrorString( err ) );
        exit(-1);        
    }
}

texture<float,2> myTex;

int main(int argc, char* argv[])
{
    float* input;
    input = new float[656 * 480];
    for(int i = 0; i < 656*480; ++i)
    {
        input[i] = i;
    }
    float* inputDevice;
    checkCudaErrors(cudaMalloc ((void**)&inputDevice, 656 * 480 * sizeof(float) ));
    checkCudaErrors(cudaMemcpy(inputDevice, input, 656 * 480 * sizeof(float), cudaMemcpyHostToDevice));

    cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();

    checkCudaErrors(cudaBindTexture2D(0, myTex, inputDevice, desc, 656, 480, sizeof(float) * 656));

    cudaUnbindTexture(myTex);
    cudaFree(inputDevice);

    return 0;
}

但在我的真实项目中,大概相同的代码不起作用。

texture<float,2> texInput;

/* a lot of code here, but nothing with texInput */

    void CUDAConv::DoConvolution(float* input, float* kernel1D, float* resultMap, unsigned char* rMap, unsigned char* orientMap, int width, int height, int kernelSize)
    {
        int fDim = (int)(floor((sqrt((float)(width * width + height * height)) / 2 + 0.5f)));
        //Lock* locks = new Lock[width * height];
        int dim = fDim * 2 + 1;
        devWidth = width;
        devHeight = height;

        // allocate memory on GPU for the summing images
        checkCudaErrors(cudaMalloc((void**)&inputDevice, width * height * sizeof(float)));

        checkCudaErrors(cudaMemcpy(inputDevice, input, width * height * sizeof(float), cudaMemcpyHostToDevice));

        cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
        checkCudaErrors(cudaMalloc((void**)&kernel1DDevice, kernelSize));

        checkCudaErrors(cudaBindTexture2D(0, texInput, inputDevice, desc, width, height, sizeof(float) * width));

        checkCudaErrors(cudaMalloc((void**)&radiusDevice, width * height));
        checkCudaErrors(cudaMalloc((void**)&orientationDevice, width * height));
        checkCudaErrors(cudaMalloc((void**)&resultDevice, width * height * sizeof(float)));
        checkCudaErrors(cudaMemset(resultDevice, 0x00, width * height * sizeof(float)));
        checkCudaErrors(cudaMemcpy(kernel1DDevice, kernel1D, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
        for(int i = 0; i < angles; ++i)
        {
            checkCudaErrors(cudaMalloc((void**)&sumUpImage[i], angles * width * height * sizeof(float)));
            checkCudaErrors(cudaMemset(&sumUpImage[i], 0x00, width * height * angles * sizeof(float)));
            checkCudaErrors(cudaMalloc((void**)&rotationImage[i], angles * dim * dim * sizeof(float)));
            checkCudaErrors(cudaMemset(&rotationImage[i], 0x00, dim * dim * angles * sizeof(float)));
        }

        // do all convolution calculations in the Convolution function
        convolution <<<1, angles>>> (/*locks, */inputDevice, kernel1DDevice, rotationImage, sumUpImage, resultDevice, radiusDevice, orientationDevice, devWidth, devHeight, angles);

        checkCudaErrors(cudaMemcpy(resultMap, resultDevice, width * height * sizeof(float), cudaMemcpyDeviceToHost));

        // free memory allocated on the GPU
        for(int i = 0; i < angles; ++i)
        {
            checkCudaErrors(cudaFree(sumUpImage[i]));
        }

        //free(locks);
        cudaUnbindTexture(texInput);
        cudaFree(inputDevice);
        cudaFree(kernel1DDevice);
        cudaFree(radiusDevice);
        cudaFree(orientationDevice);
        cudaFree(resultDevice);
    }

我因此在 cudaBindTexture2D 上得到的错误是:

CUDA_Conv.cu(203):CUDA 运行时 API 错误 18:纹理引用无效。

调试时 texInput 看起来就像 myTex,我不知道这里发生了什么。

将 CUDA4.2 与 VS2010 一起使用。

4

1 回答 1

0

答案在评论中,我使用了一个我的 GPU 不兼容的 sm,奇怪的是它不会在 cudaMalloc 上抛出错误,而只是在 cudaBindTexture 上,但设置相同的 sm / arch,它们的行为相同的。

谢谢罗杰!

于 2012-05-01T15:21:30.320 回答