我有一个“掩码数组”,我想将它添加到另一个数组中——换句话说,我有 3 个数组A
,B
和mask
。我的问题是存储掩码(作为逻辑数组,作为一个和零的真实数组)的最有效(就执行时间而言)方法是什么?
编辑
这是一个你可以玩的玩具程序(如果你有 mpif77):
program main
implicit None
include 'mpif.h'
integer, parameter :: ntry=10000
integer, parameter :: asize=1000000
real,dimension(asize) :: A,B,maskr
logical,dimension(asize) :: mask
real*8 :: dd,dt,dtave,dtbest
integer i
do i=1,asize
maskr(i)=mod(i,2)
mask(i)=.False.
if(mod(i,2).eq.0) mask(i)=.True.
enddo
A=1.0; B=1.0
dtbest=1d33
dtave=0.0
do i=1,ntry
dt=mpi_wtime()
call add_arrays_logical(asize,A,B,mask)
dt=mpi_wtime()-dt
dtbest=min(dt,dtbest)
dtave=dtave+dt
enddo
print*,"==== logical ==="
print*,"Average",dtave/ntry
print*,"Best",dtbest
A=1.0; B=1.0
dtbest=1d33
dtave=0.0
do i=1,ntry
dt=mpi_wtime()
call add_arrays_real(asize,A,B,maskr)
dt=mpi_wtime()-dt
dtbest=min(dt,dtbest)
dtave=dtave+dt
enddo
print*,"==== Real ==="
print*,"Average",dtave/ntry
print*,"Best",dtbest
A=1.0; B=1.0
dtbest=1d33
dtave=0.0
do i=1,ntry
dt=mpi_wtime()
where(mask) A=A+B
dt=mpi_wtime()-dt
dtbest=min(dt,dtbest)
dtave=dtave+dt
enddo
print*,"==== Where ===="
print*,"Average",dtave/ntry
print*,"Best",dtbest
end
subroutine add_arrays_logical(n,A,B,mask)
integer n
real A(n),B(n)
logical mask(n)
do i=1,n
if(mask(i))then
A(i)=A(i)+B(i)
endif
enddo
end
subroutine add_arrays_real(n,A,B,mask)
integer n
real A(n),B(n),mask(n)
do i=1,n
A(i)=A(i)+mask(i)*B(i)
enddo
end
我的结果:
(gfortran -O2)
==== logical ===
Average 1.52590200901031483E-003
Best 1.48987770080566406E-003
==== Real ===
Average 1.78022863864898680E-003
Best 1.74498558044433594E-003
==== Where ====
Average 1.48216445446014400E-003
Best 1.44505500793457031E-003
(gfortran -O3 -funroll-loops -ffast-math)
==== logical ===
Average 1.47997992038726811E-003
Best 1.44982337951660156E-003
==== Real ===
Average 1.40655457973480223E-003
Best 1.37186050415039063E-003
==== Where ====
Average 1.48403010368347165E-003
Best 1.45006179809570313E-003
(pfg90 -fast) -- 在一台非常旧的机器上
==== logical ===
Average 5.4871437072753909E-003
Best 5.4519176483154297E-003
==== Real ===
Average 4.6096980571746831E-003
Best 4.5847892761230469E-003
==== Where ====
Average 5.3572671413421634E-003
Best 5.3288936614990234E-003
(pfg90 -O2) -- 在一台非常旧的机器上
==== logical ===
Average 5.4929971456527714E-003
Best 5.4569244384765625E-003
==== Real ===
Average 5.5974062204360965E-003
Best 5.5701732635498047E-003
==== Where ====
Average 5.3811835527420044E-003
Best 5.3341388702392578E-003
当然,有一些因素可能会影响这一点——例如编译器向量化循环的能力——那么关于如何实现这样的事情是否有经验法则?