0

我正在使用 CUDA 优化一些代码。我不确定是否应该在 _ _ global _ _ 函数 (fun1) 中使用 cudaMalloc(不是 x 已经在 GPU 的内存上分配了吗?):

__global__ void fun2(double *y)
{
    int i=blockIdx.x;
    y[i]=...;
}

__global__ void fun1(double *x)
{
    //should I cudaMalloc() y for fun2 or just use the x which was already allocated in main?
    fun2<<<N,1>(x);
    ...
}

int main(){
    double *x;
    ...
    cudaMalloc((void**)&x, N*sizeof(double));
    fun1<<<N,1>>>(x);
    ...
}
4

1 回答 1

1

可能你的意思是这样的:

 __device__ void fun2(double *y)
    {
      int i=blockIdx.x;
      y[i]=...;
}

__global__ void fun1(double *x)
{

    fun2(x);
    ...
}

int main(){
    double *x;
    ...
    cudaMalloc((void**)&x, N*sizeof(double));
    fun1<<<N,1>>>(x);
    ...
}

但是在全局函数中计算threadId很常见

于 2013-05-22T16:15:25.413 回答