我有一个fortran77水库模拟项目,想用openacc指令加速实现,编译器是PGI visual fortran,子程序如下:
子程序 jbild(a, b, impl, [ ia, ja, neqa, kvst, ka, ibkmax, nja, ndima, nbmxc, [ isymm)
USE parameter_data
USE connect_data
USE contrl
IMPLICIT REAL*8(A-H,O-Z)
include 'eleme.com'
COMMON/G9/NEXG(MNOGN)
COMMON/shiftf/SFTMIN(mxcom)
COMMON/gm_nm/gamman(njamax)
common/jocab2/uf(3,ibnd+maxlay),flw(3),fsav(3,ibnd+maxlay)
[ ,fsum(3),fsums(3), fdsum(3), fdp(3), fdiag(3)
COMMON/well_1/iwell(mnel)
COMMON/well_2/pwell(mnogn),vol_w(mnogn)
COMMON/source/qm_bc(mxcom)
integer ndima, nbmxc, ibkmax, nja, impl(ibkmax),
[ ia(ibkmax+1), ja(nja), neqa(ibkmax), kvst(ibkmax+1),
[ ka( nja+1), isymm(nja)
double precision a(ndima), b(nbmxc)
double precision fdsav(mxcom),eps
parameter (eps=1.0d-300)
COMMON/scndv2/densn(maxnn,mxphs+1),accn(maxnn,mxcom),acck(mxcom),acck_All
double precision Epsilon1(mxphs), Epsilon2(mxphs)
IDiagonal_dominace = 1 !--- =1: JACOBI
do 1 i=1, ka( nja+1 ) ! ndima
a(i) = 0.0d0
1 continue
do 3 i = 1, kvst( ibkmax + 1) ! nbmxc
b(i) = 0.0d0
3 continue
EpsilonMax1 = 0.0
EpsilonMax2 = 0.0
NEpsilonMax1 = 1
NEpsilonMax2 = 1
do 1000 i=1, ibkmax
inode=i
imat=matx(i)
do iphas=1, nph
fsum(iphas) = 0.0d0
fsums(iphas) = 0.0d0
fdsum( iphas ) = 0.0d0
fdsav(iphas) = 0.0d0
do index=1, ia(i+1)-1 - ( ia(i) )
fsav(iphas, index) = 0.0d0
enddo
enddo
if(iwell(inode).eq.0) then
do iphas=1,mnph
qm_bc(iphas)=0.0d0
enddo
elseif(iwell(inode).eq.1) then
call bc_ev(INODE)
elseif(iwell(inode).eq.2) then
call bc_well(inode,ishift,ia, ja,nja)
endif
call eqnsa(INODE,IMAT,ishift)
jconet=0
do 12 index = ia(i)+1, ia(i+1)-1
id = ja( index )
jconet=jconet+1
if(dabs(gamman(index)).le.eps) goto 12
call eqnsf(INODE,id,imat,ishift,index,jconet,impl(i))
do 14 iphas = 1, mxphs
if( impl(i) .eq. 0 )then
a( ka(isymm(index)) + iphas ) = fdp( iphas )
else
fsav(iphas, jconet) = flw(iphas)
endif
14 continue
if( (impl(i) .eq. 0) .AND. (IDiagonal_dominace .eq. 1) )then !---
a( ka(isymm(index)) + 1 ) = fdp( 3 )
a( ka(isymm(index)) + 3 ) = fdp( 1 )
endif
12 continue
do 15 iphas = 1, mxphs
b( kvst(i) + iphas ) = -fsum( iphas )
if(EPSN1.GT.0.0.AND.EPSN2.GT.0.0) then !-- -----------------
Epsilon1(iphas) = abs(b( kvst(i) + iphas )/(acck_All+1.0D-20)) !---
Epsilon2(iphas) = abs(b( kvst(i) + iphas )) !---
if(EpsilonMax1(iphas).LT.Epsilon1(iphas)) then
EpsilonMax1(iphas) = Epsilon1(iphas)
NEpsilonMax1(iphas) = i
endif
if(EpsilonMax2(iphas).LT.Epsilon2(iphas)) then
EpsilonMax2(iphas) = Epsilon2(iphas)
NEpsilonMax2(iphas) = i
endif
endif !--------------------------
if( impl(i) .eq. 0) then
fdsav( iphas ) = fdiag( iphas)
else
fsums(iphas) = fsum(iphas)
fdsav(iphas) = fdiag(iphas)
endif
15 continue
if(IDiagonal_dominace.EQ.1)then !---
b( kvst(i) + 1 ) = -fsum( 3 )
b( kvst(i) + 3 ) = -fsum( 1 )
endif
do 2000 icol=1, nph
isave=1
ishift=1
call save_v(INODE,ISAVE,ICOL)
call shif(INODE,ICOL,stemp)
if( impl(i) .eq. 0)then
kupdat = 0
else
kupdat = 1
endif
if(MOP(10).NE.0) kupdat=0 !--- add by Diyuan, 2014-6-6
call eosms(inode,kupdat,b,kvst)
if(iwell(inode).eq.0) then
!$acc loop
do iphas=1,mnph
qm_bc(iphas)=0.0d0
enddo
elseif(iwell(inode).eq.1) then
call bc_ev(INODE)
elseif(iwell(inode).eq.2) then
call bc_well(inode,ishift,ia, ja,nja)
endif
call eqnsa(inode,IMAT,ishift)
if( impl(i) .eq. 0 ) go to 100
if( impl(i) .eq. 0 .and. icol .gt. 1) go to 100
jconet=0
do 150 index =ia(i)+1, ia(i+1)-1
id = ja( index )
jconet=jconet+1
if(dabs(gamman(index)).le.eps) goto 150
call save_tauf_C(idcon(index),1,ICOL)
call EOSMS_Connection(idcon(index),kupdat) !-- add by Diyuan, 2012-6-22
call eqnsf(INODE,id,imat,ishift,index,jconet,impl(i))
call save_tauf_C(idcon(index),2,ICOL)
do 101 irow=1,mxphs
if( impl(i) .eq. 1)then
a(ka(isymm(index))+(irow-1)*mxphs+icol) = +(flw(irow)-fsav(irow,jconet))/stemp
else
a(ka(isymm(index))+irow) = (flw(irow)-fsav(irow,jconet))/stemp
endif
101 continue
if(IDiagonal_dominace.EQ.1)then !---
a_temp = a(ka(isymm(index))+(1-1)*mxphs+icol)
a(ka(isymm(index))+(1-1)*mxphs+icol) = a(ka(isymm(index))+(3-1)*mxphs+icol)
a(ka(isymm(index))+(3-1)*mxphs+icol) = a_temp
endif
150 continue
100 continue
do 120 irow=1, mxphs
itemp = ka( ia(i) ) + (irow-1)* mxphs
if( impl(i) .eq. 0 ) then
a(itemp + icol) = ( fdiag(irow) - fdsav(irow) ) / stemp
if( icol .eq. 1)then
a( itemp + icol ) = a( itemp + icol ) + fdsum(irow)
endif
else
a(itemp+icol) = + (fsum(irow) - fsums(irow ))/ stemp
endif
120 continue
if(IDiagonal_dominace.EQ.1)then !---
itemp1 = ka( ia(i) ) + (1-1)* mxphs
itemp3 = ka( ia(i) ) + (3-1)* mxphs
a_temp = a(itemp1 + icol)
a(itemp1 + icol) = a(itemp3 + icol)
a(itemp3 + icol) = a_temp
endif
isave=2
call save_v(INODE,ISAVE,ICOL)
2000 continue
1000 continue
ishift=0
RETURN
END
但是当我添加 openacc 指令时,我看不到输出信息和数据复制信息,在控制台中也没有输出内核执行时间信息。我已经设置了环境变量和命令行参数,以确保信息输出。:
!$acc parallel loop
do iphas=1, nph
fsum(iphas) = 0.0d0
fsums(iphas) = 0.0d0
fdsum( iphas ) = 0.0d0
fdsav(iphas) = 0.0d0
do index=1, ia(i+1)-1 - ( ia(i) )
fsav(iphas, index) = 0.0d0
enddo
enddo
!$acc end parallel
数组存储在 .com 文件中。不知道为什么openacc没有努力,goto-statement有什么影响,我想删除goto-statement程序修改使用openacc的程序