我无法从 2D 纹理中获取
texture<float2, cudaTextureType2D, cudaReadModeElementType> tex;
// ...
assert(cudaMallocPitch(&imgcov2_device, &pitch, sizeof(ComplexFloat)*x*y*z, N*N) == cudaSuccess);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float2>();
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.filterMode = cudaFilterModePoint;
tex.normalized = false;
assert(cudaBindTexture2D(NULL, tex, imgcov2_device, channelDesc, x*y*z, N*N, x*y*z*N*N*sizeof(ComplexFloat)) == cudaSuccess);
// ...
tmp = ComplexFloatAdd(tmp, ComplexFloatMul(y[j + i*N], tex2D(tex, blockIdx.x * blockDim.x + threadIdx.x, threadIdx.y + j*N))); //fetch
我确信 tex2D 的最后两个参数在范围[0,x*y*z-1]
和[0,N*N-1]
. 在另一篇文章中建议使用倾斜记忆,但我没有运气。有任何想法吗?可疑的部分是它不会在 x y z = 90000, N N = 32^2 时失败,但在 N N = 8^2 时不会失败。ComplexFloat
类型定义为float2
. 只有获取失败。