我遇到了两者的奇怪行为,gfortran
并且ifort
在调查为什么reshape
比使用循环的幼稚实现慢得多的过程中:
my_reshape
我为两个整形函数(reshape3Dto1D
和reshape1Dto3D
)定义了一个接口。当直接调用接口函数而不是成员函数时,我注意到加速了 10-40%!更改调用顺序、级别优化,甚至编译器都没有改变这种行为。
我犯了一个错误,还是有人对此有解释?
这是(简化的)代码:
reshapetest_simple.F90:
module test
interface my_reshape
module procedure :: reshape3Dto1D
end interface
contains
function reshape3Dto1D( mat, dims )
use, intrinsic :: iso_fortran_env
implicit none
integer, parameter :: cp = REAL64
real(cp),intent(in) :: mat(:,:,:)
integer,intent(in) :: dims(1)
real(cp) :: reshape3Dto1D(dims(1))
integer :: x,y,z, i
i=0
do z=1,size(mat,3)
do y=1,size(mat,2)
do x=1,size(mat,1)
i=i+1
reshape3Dto1D(i) = mat(x,y,z)
enddo ! y
enddo ! y
enddo !z
end function
end module
program changeDim
use test
use omp_lib
use, intrinsic :: iso_fortran_env
implicit none
integer, parameter :: cp = REAL64
real(REAL64) :: t1, t2, t3, t4
integer,parameter :: dimX=100, dimY=100, dimZ=100
integer,parameter :: dimProduct = dimX*dimY*dimZ
integer :: stat
real(cp),pointer,contiguous :: matrix3d(:,:,:), matrix1d(:)
allocate( matrix3d(dimX,dimY,dimZ), matrix1d(dimProduct), stat=stat )
if (stat/=0) stop 'Cannot allocate memory';
call random_number(matrix3d)
matrix1d = 0._cp
! (1) Naive copy using a function
t1 = omp_get_wtime()
matrix1d = reshape3Dto1D( matrix3d, [ dimProduct ] )
t2 = omp_get_wtime()
! (2) Reshape
matrix1d = reshape( matrix3d, [ dimProduct ] )
t3 = omp_get_wtime()
! (3) Same as (1), but using the interface
matrix1d = my_reshape( matrix3d, [ dimProduct ] )
t4 = omp_get_wtime()
write(*,*) 'Reshape: ',t3-t2
write(*,*) 'Naive fct direct: ',t2-t1
write(*,*) 'Naive fct interface: ',t4-t3
deallocate( matrix3d, matrix1d )
end program
我用过gfortran 4.8.1
,和ifort 13.1.3
。二进制文件是使用编译的
ifort -o reshape-ifort -openmp reshapetest_simple.F90 -O3
gfortran -o reshape-gfortran -fopenmp reshapetest_simple.F90 -O3
并给出了以下结果:
OMP_NUM_THREADS=1 ./reshape-gfortran
Reshape: 6.8527370000310839E-003
Naive fct direct: 5.0175579999631736E-003
Naive fct interface: 4.6131109999123510E-003
OMP_NUM_THREADS=1 ./reshape-ifort
Reshape: 3.495931625366211E-003
Naive fct direct: 5.089998245239258E-003
Naive fct interface: 3.136873245239258E-003
顺便说一句:我知道对于这种重塑最好使用指针来避免复制数组......