0

我试图将 openacc 与指向包含动态分配成员的结构的指针结合起来。下面的代码失败了

线程失败:1 调用 cuStreamSynchronize 返回错误 700:内核执行期间地址非法

使用 nvc 编译时(“x86-64 Linux -tp haswell 上的 nvc 20.9-0 LLVM 64 位目标”)。据我所知,我正在遵循例如 OpenACC“入门”指南中建议的方法。但不知何故,指针可能不会粘在设备上(?)。有谁知道这里出了什么问题?

#include <stdlib.h>
#include <stdio.h>

typedef struct grid
{
  int N;
  double *X;
} grid;

void allocate(grid* g, int N)
{
  g->N = N;
  g->X = (double*) malloc(sizeof(double) * g->N);

  #pragma acc enter data create(g[0:1])
  #pragma acc enter data create(g->X[0:N])
}

void release(grid* g)
{
  #pragma acc exit data delete(g->X[0:g->N])
  #pragma acc exit data delete(g[0:1])

  free(g->X);
}

void fill(grid * g)
{
  int i;

  #pragma acc parallel loop
  for (i = 0; i < g->N; i++)
  {
    g->X[i] = 42; // the cuprit, commenting this removes the error too
  }
}

int main()
{
  grid g;

  allocate(&g, 10);

  fill(&g);

  release(&g);

  return 0;
}```
4

1 回答 1

0

从编译器反馈消息中,您会看到如下内容:

     fill:
          32, Accelerator restriction: size of the GPU copy of g is unknown
              Generating Tesla code
              32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
          32, Generating implicit copyin(g) [if not already present]
          37, Generating update self(g->X[:g->N])

问题是编译器无法隐式复制具有动态数据成员的聚合类型,因此您需要添加“present(g)”以指示 g 已经是设备。

此外,您需要复制以在设备上获取 N 的值,并且无需在退出数据删除指令中包含数组形状。例如:

% cat test.c
#include <stdlib.h>
#include <stdio.h>

typedef struct grid
{
  int N;
  double *X;
} grid;

void allocate(grid* g, int N)
{
  g->N = N;
  g->X = (double*) malloc(sizeof(double) * g->N);

  #pragma acc enter data copyin(g[0:1])
  #pragma acc enter data create(g->X[0:N])
}

void release(grid* g)
{
  #pragma acc exit data delete(g->X)
  #pragma acc exit data delete(g)

  free(g->X);
}

void fill(grid * g)
{
  int i;

  #pragma acc parallel loop present(g)
  for (i = 0; i < g->N; i++)
  {
    g->X[i] = 42; // the cuprit, commenting this removes the error too
  }
  #pragma acc update self(g->X[:g->N])
  for (i = 0; i < 4; i++)
  {
    printf("%d : %f \n",i,g->X[i]);
  }
}

int main()
{
  grid g;

  allocate(&g, 10);

  fill(&g);

  release(&g);

  return 0;
}

% nvc -acc test.c -Minfo=accel -V20.9 ; a.out
allocate:
     17, Generating enter data copyin(g[:1])
         Generating enter data create(g->X[:N])
release:
     24, Generating exit data delete(g[:1],g->X[:1])
fill:
     32, Generating present(g[:1])
         Generating Tesla code
         32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
     37, Generating update self(g->X[:g->N])
0 : 42.000000
1 : 42.000000
2 : 42.000000
3 : 42.000000
于 2020-12-09T22:59:22.467 回答