我在 Fortran 95 中使用 Open MP 制作了一个传热模拟程序的并行版本,它似乎可以工作(结果相同)。但是,经过的时间(“挂钟”时间)与顺序版本相同。我的默认线程数是 4,但即使我更改它,计算时间也大致相同......我也是 Open MP 和并行化的新手,所以我很难理解我的错误在哪里。也许你可以帮我找到他们?
编辑:
这是我的代码
program heat
use omp_lib
implicit none
! Variables
real,PARAMETER :: alpha = 1e-02 ! heat diffusive coefficient
integer,PARAMETER :: s = 10 ! Kelvin/s
integer,PARAMETER :: L = 1 ! cubic side length
real,PARAMETER :: dt = 1e-03 ! time step
real,PARAMETER :: dx = 1e-02 ! space step
real,PARAMETER :: dy = 1e-02 ! space step
real,PARAMETER :: dz = 1e-02 ! space step
integer,PARAMETER :: N = int(L/dx) ! Finite volumes
integer,PARAMETER :: t_obs = 120 ! secondes
integer,PARAMETER :: ite = int(t_obs/dt) ! iterations
real :: c = (alpha*dt)/(dx**2) ! scheme stability criteria
integer :: total_length = (N+2)
real,ALLOCATABLE,DIMENSION(:,:,:) :: T
! Boundaries conditions
real :: T_R = 298 ! right 298
real :: T_L = 298 ! left 298
real :: T_F = 323 ! front 323
real :: T_B = 323 ! back 323
real :: T_U = 373 ! up 373
real :: T_D = 373 ! down 373
integer :: i,j,k,iteration
print*, "c = ",c
ALLOCATE(T(total_length,total_length,total_length))
! Initial condition
do i = 1,N+2
do j = 1, N+2
do k = 1, N+2
if (j == 1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_F
else if (i == N+2 .and. j /= N+2 .and. k /= N+2 .and. k /= 1) then
T(i,k,j) = T_R
else if (i == 1 .and. j /= 1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_L
else if (i /= 1 .and. j == N+2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_B
else if (k == N+2) then
T(i,k,j) = T_U
else if (k == 1) then
T(i,k,j) = T_D
else
T(i,k,j) = 283
end if
end do
end do
end do
! Datas in file:
open(10,file='para_heat_3D_unsteady2.dat',FORM = 'UNFORMATTED')
! Iterative loop
!$omp parallel default(shared) private(i,j,k)
do iteration = 0,15
!$omp single
print*, iteration
!$omp end single
if (iteration == 0) then
!$omp single
i = 1
do while (i < N+3)
do j = 1, N+2
WRITE(10) T(i,:,j)
! WRITE(10,*) T(i,:,j) ! formatted
end do
i = i + 1
end do
!$omp end single
else
! Boundary Conditions
T(:,:,1) = T_F
T(N+2,:,:) = T_R
T(1,:,:) = T_L
T(:,:,N+2) = T_B
T(:,N+2,:) = T_U
T(:,1,:) = T_D
!$omp do
do i = 2, N+1
do j = 2, N+1
do k = 2, N+1
if (i == N+1 .and. j == N+1 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + T_R * (2*c) + T(i-1,k,j)*c + T_B *(2*c) + c*T(i,k,j-1) + &
2*c*T_U + c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + T_L * (2*c) + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == N+1 .and. j == N+1 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + 2*c*T_R + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i /= 1 .and. i /= N+2 .and. j == 2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == 2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + &
c*(T(i,k+1,j) + T(i,k-1,j)) + s*dt
else if (i /= 1 .and. i /= N+2 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i-1,k,j) + c*T(i+1,k,j) + c*T(i,k,j+1) + 2*c*T_F + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == 2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*T(i,k,j+1) + 2*c*T_F + &
c*(T(i,k+1,j) + T(i,k-1,j)) + s*dt
else if (i == N+1 .and. j /= 1 .and. j /= N+2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*(T(i,k,j+1) + T(i,k,j-1)) + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == N+1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + c*T(i,k+1,j) &
+ c*T(i,k-1,j) + s*dt
else if (i == N+1 .and. j /= 1 .and. j /= N+2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (j == N+1 .and. k == N+1 .and. i /= 1 .and. i /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. k == N+1 .and. j /= 1 .and. j /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_U + c*T(i,k,j-1)+ c*T(i,k,j+1) &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + c*T(i,k+1,j) &
+ c*T(i,k-1,j) + s*dt
else if (k == 2 .and. j == N+1 .and. i /= 1 .and. i /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) &
+ c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == 2 .and. k == 2 .and. j /= 1 .and. j /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == 2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + c*T(i,k-1,j) &
+ 2*c*T_U + s*dt
else if (i == N+1 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i-1,k,j) + 2*c*T_R + c*T(i,k,j+1) + 2*c*T_F + c*T(i,k-1,j) &
+ 2*c*T_U + s*dt
else if (i == 2 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i == N+1 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i-1,k,j) + 2*c*T_R + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else
T(i,k,j) = T(i,k,j) * (1-6*c) + c*T(i-1,k,j) + c*T(i+1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + &
c*T(i,k-1,j) + c*T(i,k+1,j) + s*dt
end if
end do
end do
end do
!$omp end do
! Print in a file T values
!$omp single
i = 1
do while (i < N+3)
do j = 1, N+2
WRITE(10) T(i,:,j)
! WRITE(10,*) T(i,:,j) ! formatted
end do
i = i + 1
end do
!$omp end single
end if
end do
!$omp end parallel
DEALLOCATE(T)
end program heat
因此,对于这 15 次迭代,无论线程数如何,格式化的时间现在大约为 16 秒,并且比顺序(大约 21 秒)少一点。使用未格式化的方式,它的速度非常快(大约 1 秒),但我收到像“�..”这样的符号(可能是二进制?),我不知道如何对它进行后期处理并用它绘制一些东西......我编译它与: gfortran -fopenmp -g -fcheck=all -Wall para_heat_3D_unsteady.f95 我没有设置环境变量。