-2

我的问题是使用 CUDA 找出 n 维球体中的整数点数。我不明白下面的代码有什么问题,但它一直给出 0 输出。CUDA 计算能力为 2.0,工具包版本为 3.10。感谢所有的帮助。

__global__ void count_in(int pow_rad, int ndim,int *digit,int w,unsigned int *count,double radius)
{


long int i,j;
int rem,idx,sq,num;
int iy=blockDim.y * blockIdx.y + threadIdx.y;
int ix=blockDim.x * blockIdx.x + threadIdx.x;
int width=gridDim.x*blockDim.x;
int h=2*w+1;
i=iy*width+ix;
if(i>pow_rad) return;

    sq=0;
    idx=0;
    num=i;
    for(j=0;j<ndim;j++)
        {digit[j]=0;}
    while(num!=0)
    {
        rem=num%w;
        num/=w;
        digit[idx]=rem;
        idx++;
    }
    for(j=0;j<ndim;j++)
        {sq+=(digit[j]-h)*(digit[j]-h);}
    if(sq<(radius*radius))
        atomicInc(count,(unsigned int)1);
    __syncthreads();
}

int main(int argc, char* argv[]) 
{
const long ntrials = 5;
int i;
for (int n = 0; n < ntrials; ++n) {
    int *digit;
    unsigned int *count;
    std::cout<<n<<std::endl;
    int pow_rad;
    unsigned int num;
    // Select radius and number of dimensions at random
    const double r = drand48() * (RMAX - RMIN) + RMIN;
    const int   nd = lrand48() % (MAXDIM - 1) + 1;
    cudaMalloc((void**) &digit,sizeof(int)*nd);
    cudaMalloc((void**) &count,sizeof(unsigned int));
    cudaMemset(count,0,sizeof(unsigned int));
    int h=(int)floor(r);
    int w=2*h+1;
    std::cout << "###"<< r <<" "<< nd<< std::endl;
    for(i=1;i<=nd;i++)
        pow_rad*=w;
    int width=(int)sqrt(pow_rad);
    // Call your function
    dim3 dimBlock(32,32);
    dim3 dimGrid((width/32)+1,(width/32)+1);

count_in<<<dimGrid,dimBlock>>>(pow_rad, nd,digit,w,count,r);
    cudaMemcpy(&num,count,sizeof(unsigned int),cudaMemcpyDeviceToHost);
std::cout << "-->"<<num << std::endl;
}
}
4

1 回答 1

1

I didn't look at all of your code, but the lines

    atomicInc(count,(unsigned int)1);

seems to show a common misunderstanding of the atomicInc function. The second argument is not the amount to increment, but the modulus; when the global variable reaches that amount, it resets to zero. With the value you specified, each time the statement executes the variable count is reset to 0.

If you change atomicInc to atomicAdd, or if you change the modulus to something large enough that it will never be reached, it should work better.

于 2015-06-19T20:31:19.353 回答