你有一个完整的例子来重现你看到的错误吗?我使用您的代码 snip-it 编写了一个小测试示例,它运行良好。(见下文)
如果您在创建数组后更新“this”指针,那么这将是一个问题,因为您将用主机指针覆盖设备指针。但正如您在上面显示的那样,这不应该是一个问题。
% cat test.cpp
#include <iostream>
class B
{
public:
int N;
double *__restrict__ u;
double *__restrict__ v;
void construct(int);
};
void B::construct(int _N)
{
N=_N;
u=new double[N];
v=new double[N];
#pragma acc enter data create(this)
#pragma acc update device(this)
#pragma acc enter data create(u[:N])
#pragma acc enter data create(v[:N])
}
class A
{
public:
int N;
B b;
double *__restrict__ w;
void construct(int);
};
void A::construct(int _N)
{
N=_N;
w=new double[N];
#pragma acc enter data create(this)
#pragma acc update device(this)
#pragma acc enter data create(w[:N])
// allocate class A
b.construct(N);
}
int main() {
A myA;
int N=32;
myA.construct(N);
#pragma acc parallel loop present(myA)
for (int i=0; i<N; ++i) {
myA.w[i] = i;
myA.b.u[i] = i;
myA.b.v[i] = i;
}
#pragma acc update host( myA.w[:N], myA.b.u[:N], myA.b.v[:N])
for (int i=0; i<N; ++i) {
std::cout << myA.w[i] << ":" << myA.b.u[i] << ":" << myA.b.v[i] << std::endl;
}
return 0;
}
% pgc++ test.cpp -Minfo=accel -V18.10 -ta=tesla; a.out
main:
49, Generating present(myA)
Accelerator kernel generated
Generating Tesla code
52, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */
56, Generating update self(myA.b.u[:N],myA.w[:N],myA.b.v[:N])
B::construct(int):
21, Generating update device(this[:1])
Generating enter data create(this[:1],v[:N],u[:N])
A::construct(int):
41, Generating update device(this[:1])
Generating enter data create(w[:N],this[:1])
0:0:0
1:1:1
2:2:2
3:3:3
4:4:4
5:5:5
6:6:6
7:7:7
8:8:8
9:9:9
10:10:10
11:11:11
12:12:12
13:13:13
14:14:14
15:15:15
16:16:16
17:17:17
18:18:18
19:19:19
20:20:20
21:21:21
22:22:22
23:23:23
24:24:24
25:25:25
26:26:26
27:27:27
28:28:28
29:29:29
30:30:30
31:31:31