通常编译器可以自动检测减少,但在这种情况下不能。因此,您需要自己添加一个缩减子句。这是使用 pgcc 16.4 版时的输出:
% cat test.c
#include <stdlib.h>
#include <stdio.h>
int main() {
int i, j, kkk, intersectionsCount;
int shape1Count,shape2Count;
shape1Count=32;
shape2Count=32;
intersectionsCount=0;
kkk=1;
#pragma acc kernels loop reduction(+:intersectionsCount)
for (i = 0; i<shape1Count; i++){
for (j = 0; j<shape2Count; j++){
if (kkk==1){
intersectionsCount++;
} else {
intersectionsCount++;
}
}
}
printf("%d\n",intersectionsCount);
exit(0);
}
% pgcc test.c -Minfo=accel -acc; a.out
main:
15, Loop is parallelizable
16, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
15, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */
16, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */
Generating reduction(+:intersectionsCount)
1024