我尝试将一段代码与 OPENMP 并行,但是随着处理器数量的增加,代码运行速度变慢了。!
调用 OMP_set_num_threads(1)-->16.7sec
调用 OMP_set_num_threads(4)-->17.7sec
调用 OMP_set_num_threads(8)-->19sec
系统规格
Intel Corei7 3610QM 2.3GH 到 3.2GH,4 核 8 线程 ///8GB ram DDR3
call OMP_set_num_threads(8)
!$omp parallel
!$omp do private(k,i,j,r,epsilonxx,epsilonyy,epsilonxy,epsilonzz,epsilonxz,&
epsilonyz) reduction(+:dr)
do k=1,niac
i = pair_i(k)
j = pair_j(k)
dx(1) = x(1,j) - x(1,i)
dr = dx(1)*dx(1)
do d=2,dim
dx(d) = x(d,j) - x(d,i)
dr = dr + dx(d)*dx(d)
enddo
r = sqrt(dr)
do d=1,dim
dvx(d) = vx(d,j) - vx(d,i)
enddo
if (dim.eq.3) then
if((abs(itype(i)).gt.1000 .and. abs(itype(j)).gt.1000 ) ) then
epsilonxx = dvx(1)*dwdx(1,k)
epsilonyy = dvx(2)*dwdx(2,k)
epsilonxy = (1/2.)*(dvx(1)*dwdx(2,k)+dvx(2)*dwdx(1,k))
epsilonzz = dvx(dim)*dwdx(dim,k)
epsilonxz = (1/2.)*(dvx(1)*dwdx(dim,k)+dvx(dim)*dwdx(1,k))
epsilonyz = (1/2.)*(dvx(2)*dwdx(dim,k)+dvx(dim)*dwdx(2,k))
epsxx(i) = epsxx(i) + mass(j)*epsilonxx/rho(j)
epsxx(j) = epsxx(j) + mass(i)*epsilonxx/rho(i)
epsyy(i) = epsyy(i) + mass(j)*epsilonyy/rho(j)
epsyy(j) = epsyy(j) + mass(i)*epsilonyy/rho(i)
epsxy(i) = epsxy(i) + mass(j)*epsilonxy/rho(j)
epsxy(j) = epsxy(j) + mass(i)*epsilonxy/rho(i)
epszz(i) = epszz(i) + mass(j)*epsilonzz/rho(j)
epszz(j) = epszz(j) + mass(i)*epsilonzz/rho(i)
epsxz(i) = epsxz(i) + mass(j)*epsilonxz/rho(j)
epsxz(j) = epsxz(j) + mass(i)*epsilonxz/rho(i)
epsyz(i) = epsyz(i) + mass(j)*epsilonyz/rho(j)
epsyz(j) = epsyz(j) + mass(i)*epsilonyz/rho(i)
elseif( (abs(itype(i)).lt.1000 ) .and. (abs(itype(j)).gt.1000 ) ) then
epsilonxx_interface(i) =(2/3.)*(2.e0*dvx(1)*dwdx(1,k)
epsilonxx_interface(j) =dvx(1)*dwdx(1,k)
epsilonyy_interface(i) =(2/3.)*(2.e0*dvx(2)*dwdx(2,k)
epsilonyy_interface(j) =dvx(2)*dwdx(2,k)
epsilonxy_interface(i) =dvx(1)*dwdx(2,k) + dvx(2)*dwdx(1,k)
epsilonxy_interface(j) =(1/2.)*(dvx(1)*dwdx(2,k)+dvx(2)*dwdx(1,k))
epsilonzz_interface(i) =(2/3.)*(2.e0*dvx(dim)*dwdx(dim,k)
epsilonzz_interface(j) =dvx(dim)*dwdx(dim,k) epsilonxz_interface(i) =dvx(1)*dwdx(dim,k) + dvx(dim)*dwdx(1,k)
epsilonxz_interface(j) =(1/2.)*(dvx(1)*dwdx(dim,k)+dvx(dim)*dwdx(1,k))
epsilonyz_interface(i) =dvx(2)*dwdx(dim,k) + dvx(dim)*dwdx(2,k)
epsilonyz_interface(j) =(1/2.)*(dvx(2)*dwdx(dim,k)+dvx(dim)*dwdx(2,k))
epsxx(i) = epsxx(i) + mass(j)*epsilonxx_interface(i)/rho(j)
epsxx(j) = epsxx(j) + mass(i)*epsilonxx_interface(j)/rho(i)
epsyy(i) = epsyy(i) + mass(j)*epsilonyy_interface(i)/rho(j)
epsyy(j) = epsyy(j) + mass(i)*epsilonyy_interface(j)/rho(i)
epsxy(i) = epsxy(i) + mass(j)*epsilonxy_interface(i)/rho(j)
epsxy(j) = epsxy(j) + mass(i)*epsilonxy_interface(j)/rho(i)
epszz(i) = epszz(i) + mass(j)*epsilonzz_interface(i)/rho(j)
epszz(j) = epszz(j) + mass(i)*epsilonzz_interface(j)/rho(i)
epsxz(i) = epsxz(i) + mass(j)*epsilonxz_interface(i)/rho(j)
epsxz(j) = epsxz(j) + mass(i)*epsilonxz_interface(j)/rho(i)
epsyz(i) = epsyz(i) + mass(j)*epsilonyz_interface(i)/rho(j)
epsyz(j) = epsyz(j) + mass(i)*epsilonyz_interface(j)/rho(i)
elseif( (abs(itype(i)).gt.1000 ) .and. (abs(itype(j)).lt.1000 ) ) then
epsilonxx_interface(j) = (2/3.)*(2.e0*dvx(1)*dwdx(1,k)
epsilonxx_interface(i) =dvx(1)*dwdx(1,k)
epsilonyy_interface(j) =(2/3.)*(2.e0*dvx(2)*dwdx(2,k)
epsilonyy_interface(i) = dvx(2)*dwdx(2,k)
epsilonxy_interface(j) =dvx(1)*dwdx(2,k) + dvx(2)*dwdx(1,k)
epsilonxy_interface(i) = (1/2.)*(dvx(1)*dwdx(2,k)+dvx(2)*dwdx(1,k))
epsilonzz_interface(j) = (2/3.)*(2.e0*dvx(dim)*dwdx(dim,k)
epsilonzz_interface(i) =dvx(dim)*dwdx(dim,k)
epsilonxz_interface(j) =dvx(1)*dwdx(dim,k) + dvx(dim)*dwdx(1,k)
epsilonxz_interface(i) =(1/2.)*(dvx(1)*dwdx(dim,k)+dvx(dim)*dwdx(1,k))
epsilonyz_interface(j) =dvx(2)*dwdx(dim,k) + dvx(dim)*dwdx(2,k)
epsilonyz_interface(i) =(1/2.)*(dvx(2)*dwdx(dim,k)+dvx(dim)*dwdx(2,k))
epsxx(i) = epsxx(i) + mass(j)*epsilonxx_interface(i)/rho(j)
epsxx(j) = epsxx(j) + mass(i)*epsilonxx_interface(j)/rho(i)
epsyy(i) = epsyy(i) + mass(j)*epsilonyy_interface(i)/rho(j)
epsyy(j) = epsyy(j) + mass(i)*epsilonyy_interface(j)/rho(i)
epsxy(i) = epsxy(i) + mass(j)*epsilonxy_interface(i)/rho(j)
epsxy(j) = epsxy(j) + mass(i)*epsilonxy_interface(j)/rho(i)
epszz(i) = epszz(i) + mass(j)*epsilonzz_interface(i)/rho(j)
epszz(j) = epszz(j) + mass(i)*epsilonzz_interface(j)/rho(i)
epsxz(i) = epsxz(i) + mass(j)*epsilonxz_interface(i)/rho(j)
epsxz(j) = epsxz(j) + mass(i)*epsilonxz_interface(j)/rho(i)
epsyz(i) = epsyz(i) + mass(j)*epsilonyz_interface(i)/rho(j)
epsyz(j) = epsyz(j) + mass(i)*epsilonyz_interface(j)/rho(i)
endif
endif
enddo
!$omp end do nowait
endif
!$omp end parallel