编译器__pgi_gangidx()
扩展函数应该返回执行函数的帮派的数字 id(参见此处)。但是,我一直无法弄清楚如何在并行部分循环中使用它。
在下面的代码中,我尝试了几种可能性,其中只有一种产生了所需的答案。不幸的是,这一个按顺序运行我的并行部分循环。
该变量int place
是对多个全局数组的更复杂的特定于帮派的引用的替代,因此不容易删除。
代码可以编译为:
pgc++ -fast -acc -ta=tesla,cc60 -Minfo=accel test.cpp
代码:
#include <iostream>
#include "openacc.h"
void ResetIds(int *const ids, int size){
//Ensure everything is zeroed
for(int i=0;i<size;i++)
ids[i] = 0;
}
void ShowVector(int line, int *const ids, int size){
std::cout<<"Line "<<line<<": ";
for(int i=0;i<size;i++)
std::cout<<ids[i]<<" ";
std::cout<<std::endl<<std::endl;
}
int main(){
int gangs = 10;
int gwidth = 10;
int size = gangs*gwidth;
int *ids = new int[50*size];
//Works!
//Gives: 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
ResetIds(ids, size);
#pragma acc parallel num_gangs(gangs) copy(ids[0:size])
{
int place = __pgi_gangidx();
#pragma acc loop seq
for(int i=0;i<10;i++)
ids[place*gwidth+i] = 14;
}
ShowVector(__LINE__, ids, size);
//Gives: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ResetIds(ids, size);
#pragma acc parallel num_gangs(gangs) copy(ids[0:size])
{
int place = __pgi_gangidx()*gwidth;
#pragma acc loop
for(int i=0;i<10;i++)
ids[place+i] = 14;
}
ShowVector(__LINE__, ids, size);
//Gives: 14 14 14 14 14 14 14 14 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ResetIds(ids, size);
#pragma acc parallel num_gangs(gangs) copy(ids[0:size])
{
int place = __pgi_gangidx();
#pragma acc loop
for(int i=0;i<10;i++)
ids[place*gwidth+i] = 14;
}
ShowVector(__LINE__, ids, size);
//Gives: 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 14
ResetIds(ids, size);
#pragma acc parallel num_gangs(gangs) copy(ids[0:size])
{
int place = __pgi_gangidx();
#pragma acc loop worker
for(int i=0;i<10;i++)
ids[place*gwidth+i] = 14;
}
ShowVector(__LINE__, ids, size);
return 0;
}