这是一个头文件DataHolder.h:
#ifndef DATAHOLDER_H
#define DATAHOLDER_H
using FloatingType=float;
int LIFE=0;
const int GL=2000000;
template <typename Floating> class DataHolder {
public:
Floating particles[GL];
public:
DataHolder(){}
~DataHolder(){}
void Propagate();
void InitParticle();
};
template <typename Floating> void DataHolder<Floating>::Propagate()
{
#pragma acc parallel loop copy(LIFE) present(particles)
for(int i=0; i<LIFE; ++i) Floating r0= particles[i];
}
template <typename Floating> void DataHolder<Floating>::InitParticle()
{
#pragma acc parallel num_gangs(1) vector_length(1)
present(particles[0:GL]) copy(LIFE)
{
particles[LIFE]=0.0f;
#pragma acc atomic update
++LIFE;
}
}
#endif//DATAHOLDER_H
我在文件 main.cpp 中使用它:
#include <iostream>
#include "DataHolder.h"
#include <accelmath.h>
#include <openacc.h>
#include <cuda.h>
#include <cuda_runtime.h>
int main(int argc, char **argv)
{
DataHolder<FloatingType> * d;
cudaMalloc((void**) & d, sizeof(DataHolder<FloatingType>));
std::cout<<"sizeof(DataHolder<FloatingType>)="
<<sizeof(DataHolder<FloatingType>)/1024/1024<<" MB"<<std::endl;
LIFE=0;
int step=0;
d->InitParticle();
cudaFree(d);
}
该程序编译,但失败:
sizeof(DataHolder)=7 MB hostptr=0x501520000,stride=1,size=2000000,eltsize=4,flags=0x200=present,async=-1,threadid=1 设备[1]的当前表转储:NVIDIA Tesla GPU 0 , 计算能力 3.0, threadid=1 host:0x604b60 device:0x501ce0000 size:4 presentcount:1+0 line:26 name:LIFE 已分配块设备:0x501ce0000 size:512 thread:1 FATAL ERROR: PRESENT 子句中的数据未找到设备 1:名称=(空)主机:0x501520000 文件:/home/70-gaa/NFbuild_script_CHECK_GPU/ERROR/T3DataHolder.h _ZN10DataHolderIfE12InitParticleEv 行:26
为什么?怎么了?
我使用编译行编译了在 GPU GeForce GTX 650 Ti 上启动的代码:
cmake . -DCMAKE_C_COMPILER=pgcc -DCMAKE_CXX_COMPILER=pgc++ -
DCMAKE_CXX_FLAGS="-acc -mcmodel=medium =ta=tesla:cc30,managed -fast -
Mcuda=cuda10.1 --c++11"
使用 PGI 19.4 C++ 编译器、gcc 5.3.1、OS Fedora 23 x86_64、CUDA 10.1、CUDA 驱动程序版本 418.67。