在阅读了这个how-can-a-fortran-openacc-routine-call-another-fortran-openacc-routine之后,我仍然对这个 OpenACC 函数调用限制感到困惑。
以下是来自上述链接帖子的修改后的废话代码:
PROGRAM Test
IMPLICIT NONE
CONTAINS
SUBROUTINE OuterRoutine( N )
!$acc routine
IMPLICIT NONE
INTEGER :: N
real :: y
INTEGER :: i
DO i = 0, N
call InnerRoutine( y )
ENDDO
END SUBROUTINE OuterRoutine
subroutine InnerRoutine( y )
!$acc routine
IMPLICIT NONE
real :: y
END subroutine InnerRoutine
END PROGRAM Test
当我用nvfortran
20.7 版本编译它时,我得到了
$ nvfortran -acc -Minfo routine.f90
outerroutine:
14, Generating acc routine seq
Generating Tesla code
22, Reference argument passing prevents parallelization: y
innerroutine:
27, Generating acc routine seq
Generating Tesla code
nvvmCompileProgram error 9: NVVM_ERROR_COMPILATION.
Error: /tmp/pgaccr22eZDXceweL.gpu (43, 14): parse invalid forward reference to function '_innerroutine_' with wrong type!
ptxas /tmp/pgaccH22eJTMb0hKD.ptx, line 1; fatal : Missing .version directive at start of file '/tmp/pgaccH22eJTMb0hKD.ptx'
ptxas fatal : Ptx assembly aborted due to errors
NVFORTRAN-S-0155-Compiler failed to translate accelerator region (see -Minfo messages): Device compiler exited with error status code (routine_inline.f90: 1)
0 inform, 0 warnings, 1 severes, 0 fatal for
是什么触发了编译错误?作为比较,以下代码与 acc 函数调用
module data
integer, parameter :: maxl = 100000
real, dimension(maxl) :: xstat
real, dimension(:), allocatable :: yalloc
!$acc declare create(xstat,yalloc)
logical :: IsUsed
!$acc declare create(IsUsed)
end module
module useit
use data
contains
subroutine compute(n)
integer :: n
integer :: i
!$acc parallel loop present(yalloc,xstat)
do i = 1, n
call iprocess(i, yalloc)
enddo
end subroutine
subroutine iprocess(i, yalloc)
!$acc routine seq
integer :: i
real,intent(out) :: yalloc(:)
if(IsUsed) call kernel(i,yalloc)
contains
subroutine kernel(i,yalloc)
!$acc routine seq
integer, intent(in) :: i
real,intent(out) :: yalloc(:)
yalloc(i) = 2*xstat(i)
end subroutine
end subroutine
end module
program main
use data
use useit
implicit none
integer :: nSize = 100
!---------------------------------------------------------------------------
call allocit(nSize)
call initialize
call compute(nSize)
!$acc update self(yalloc)
write(*,*) "yalloc(10)=",yalloc(10) ! 3
call finalize
contains
subroutine allocit(n)
integer :: n
allocate(yalloc(n))
end subroutine allocit
subroutine initialize
xstat = 1.0
yalloc = 1.0
IsUsed = .true.
!$acc update device(xstat,yalloc,IsUsed)
end subroutine initialize
subroutine finalize
deallocate(yalloc)
end subroutine finalize
end program main
可以用 OpenACC 编译并运行。
更新:令人惊讶的是,对于第一段代码,当我简单地切换子程序的顺序时,它起作用了:
PROGRAM Test
IMPLICIT NONE
CONTAINS
subroutine InnerRoutine( y )
!$acc routine
IMPLICIT NONE
real :: y
END subroutine InnerRoutine
SUBROUTINE OuterRoutine( N )
!$acc routine
IMPLICIT NONE
INTEGER :: N
real :: y
INTEGER :: i
DO i = 0, N
call InnerRoutine( y )
ENDDO
END SUBROUTINE OuterRoutine
END PROGRAM Test
令我惊讶的是,这个特定的依赖于常规订购。但是为什么它适用于我上面的第二个例子呢?