0

来自绑定到 CUDA 数组的 CUDA 纹理对象的非归一化线性插值似乎返回了不正确的结果。插值似乎0.5比预期的要小。归一化线性插值似乎工作正常。

这段代码有问题吗?在进行非归一化纹理插值时,我们是否期望乘以 2?

编码:

#include <iostream>
#include <cstdio>

// simple function to print an array
template <typename T>
void print_array(const T *a, const size_t length) {
  for (size_t i=0; i!=length; i++) {
    std::cout << "a[" << i << "]: " << a[i] << std::endl;
  }
}

// attempt to interpolate linear memory
__global__
void cuda_texture_interpolate(cudaTextureObject_t tex,
                              float start,
                              float stop,
                              int count) {
  if (count < 1) { count = 1; }
  float h = (stop-start)/((float)count);
  float x = start;
  float y;
  for (int i = 0; i != count; i++) {
    y = tex1D<float>(tex,x);
    printf("x: %4g ; y: %4g\n",x,y);
    x = x + h;
  }
  y = tex1D<float>(tex,x);
  printf("x: %4g ; y: %4g\n",x,y);
}

int main(void) {
  // set up host array
  int n = 5;
  float a_host[5] = {3,2,1,2,3};
  printf("printing array on host.\n");
  print_array(a_host,n);

  // allocate and copy to cuda array
  cudaChannelFormatDesc channelDesc =
      cudaCreateChannelDesc(32, 0, 0, 0,
                            cudaChannelFormatKindFloat);
  cudaArray* cuArray;
  cudaMallocArray(&cuArray, &channelDesc, n);

  // Copy to device memory some data located at address h_data
  // in host memory
  cudaMemcpyToArray(cuArray, 0, 0, a_host, n*sizeof(float),
                    cudaMemcpyHostToDevice);

  // create texture object
  cudaResourceDesc resDesc;
  memset(&resDesc, 0, sizeof(resDesc));
  resDesc.resType = cudaResourceTypeArray;
  resDesc.res.array.array = cuArray;

  cudaTextureDesc texDesc;
  memset(&texDesc, 0, sizeof(texDesc));
  texDesc.addressMode[0]   = cudaAddressModeClamp;
  texDesc.filterMode       = cudaFilterModeLinear;
  texDesc.readMode         = cudaReadModeElementType;
  //texDesc.normalizedCoords = 1;
  texDesc.normalizedCoords = 0;


  cudaResourceViewDesc resViewDesc;
  memset(&resViewDesc, 0, sizeof(resViewDesc));
  resViewDesc.format = cudaResViewFormatFloat1;
  resViewDesc.width = n;

  // create texture object
  cudaTextureObject_t tex;
  cudaCreateTextureObject(&tex, &resDesc, &texDesc, &resViewDesc);

  // call interpolation kernel
  printf("interpolate (f(x) -> y).\n");
  //cuda_texture_interpolate<<<1,1>>>(tex,0.0,1.0,10);
  cuda_texture_interpolate<<<1,1>>>(tex,0.0,5.0,10);

  // clean up
  cudaDestroyTextureObject(tex);
  cudaFreeArray(cuArray);

  printf("end of texture_object_interpolation.\n");
  return 0;
}

结果:

$ ./texture_object_interpolation
printing array on host.
a[0]: 3
a[1]: 2
a[2]: 1
a[3]: 2
a[4]: 3
interpolate (f(x) -> y).
x:    0 ; y:  1.5
x:  0.5 ; y:  1.5
x:    1 ; y: 1.25
x:  1.5 ; y:    1
x:    2 ; y: 0.75
x:  2.5 ; y:  0.5
x:    3 ; y: 0.75
x:  3.5 ; y:    1
x:    4 ; y: 1.25
x:  4.5 ; y:  1.5
x:    5 ; y:  1.5
end of texture_object_interpolation.

有关上述代码、makefile 和规范化插值代码,请参阅此要点。

4

1 回答 1

2

这显然是由 CUDA 5.0 编译器中的错误引起的,并在 CUDA 5.5 版本中修复。

[这个答案是从评论中收集的,以便从 CUDA 标签的未回答队列中删除问题]

于 2016-02-21T08:30:34.880 回答