0

我在定义 mpi_type_contiguous 和稍后使用 mpi_gatherv 时遇到了一个奇怪的问题。类型定义为:

type glist
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for int version:
!  integer :: iref , biref
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    real(8) :: rvar
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for buggy version:
    integer :: ciref
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
end type glist

现在的代码不起作用。如果我要发表评论integer :: ciref,它会起作用。如果我real(8) :: rvar改为注释并取消注释其他两个整数,情况也是如此integer :: iref, biref

这意味着错误取决于数据类型的大小,但前提是其中有 a real(8)。如果我有一个real(8)和两个,int那么它会再次起作用。

该代码设计为使用 3 个线程(!)运行。我用 openmpi 和 gfortran (mpif90) 运行它。没有特殊的编译标志和使用mpirun -np 3 filename. 如果有人可以用 mpich 运行它或用 ifort 或任何有趣的东西编译它,以便找出问题出在哪里。

- - 编辑 - -

Platinummonkey 建议在下面使用mpi_type_struct,但它仍然不起作用。如果我sizeof(glist)像上面那样使用 glist,我会得到 16 而不是 12 作为答案。

- - /编辑 - -

在此先感谢您的帮助。

完整的代码是(不要担心其中一些可以忽略)

module mod_glist
type glist
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for int version:
!  integer :: iref , biref
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    real(8) :: rvar
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for buggy version:
    integer :: ciref
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
end type glist

contains

subroutine sof_glist(sof)
    implicit none
    integer, intent(out) :: sof

    type(glist) :: dum
    integer     :: val

    val = 0
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for int version:
!  val = kind(dum%iref) + kind(dum%biref)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    val = val + kind(dum%rvar)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for buggy version:
    val = val + kind(dum%ciref)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    sof = val/kind(0)
    write(*,*) 'Size in bytes, integers: ', sof, val
end subroutine

end module mod_glist

program test_mpi_gatherv

use mpi
use mod_glist

    implicit none

    integer                                :: err, np, tp, nglout, i, j, nglin, sofgl, mpi_type_glist
    type(glist), dimension(:), allocatable :: gl, glcom, glsave
    integer    , dimension(:), allocatable :: glsize, nglinv, nglinp
    integer(kind=mpi_address_kind) :: ii, ij

    call mpi_init(err)
    call mpi_comm_size(mpi_comm_world, np, err)
    call mpi_comm_rank(mpi_comm_world, tp, err)
    tp = tp + 1

    call sof_glist(sofgl)
    call mpi_type_contiguous(sofgl, mpi_integer, mpi_type_glist, err)
    call mpi_type_commit(mpi_type_glist, err)
    call mpi_type_get_extent(mpi_type_glist, ii, ij, err)
    write(*,*) 'extend: ', ii, ij

    allocate(glsize(np), nglinv(np), nglinp(np))

    glsize(1) = 5
    glsize(2) = 4
    glsize(3) = 3
    glsize(4:np) = 0

    allocate(gl(glsize(tp)))
    j = 1
    do i = 1,tp-1
      j = j+glsize(i)
    enddo

    do i = 1,glsize(tp)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for int version:
!    gl(i)%iref = j
!    gl(i)%biref = -j
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      gl(i)%rvar = real(j,8)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!uncomment line below for buggy version:
      gl(i)%ciref = -j*10
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      j = j+1
    enddo

    do i=1,np ! setting up stuff can be ignored
      if(i.eq.1)then
        if(tp.eq.i)then
          nglinv(1) = 0
          nglinv(2) = 2
          nglinv(3) = 3
          nglinp(1) = 0
          nglinp(2) = nglinv(1) + nglinp(1)
          nglinp(3) = nglinv(2) + nglinp(2)
          nglin = nglinv(1) + nglinv(2) + nglinv(3)
          allocate(glcom(nglin))
          nglout = 0
        else
          if(tp.eq.2)then
            nglout = 2
            allocate(glcom(nglout))
            glcom(1) = gl(1)
            glcom(2) = gl(3)
          elseif(tp.eq.3)then
            nglout = 3
            allocate(glcom(nglout))
            glcom(1) = gl(1)
            glcom(2) = gl(2)
            glcom(3) = gl(3)
          endif
        endif
      elseif(i.eq.2)then
        if(tp.eq.i)then
          nglinv(1) = 3
          nglinv(2) = 0
          nglinv(3) = 2
          nglinp(1) = 0
          nglinp(2) = nglinv(1) + nglinp(1)
          nglinp(3) = nglinv(2) + nglinp(2)
          nglin = nglinv(1) + nglinv(2) + nglinv(3)
          allocate(glcom(nglin))
          nglout = 0
        else
          if(tp.eq.1)then
            nglout = 3
            allocate(glcom(nglout))
            glcom(1) = gl(2)
            glcom(2) = gl(4)
            glcom(3) = gl(5)
          elseif(tp.eq.3)then
            nglout = 2
            allocate(glcom(nglout))
            glcom(1) = gl(2)
            glcom(2) = gl(3)
          endif
        endif
      elseif(i.eq.3)then
        if(tp.eq.i)then
          nglinv(1) = 0
          nglinv(2) = 2
          nglinv(3) = 0
          nglinp(1) = 0
          nglinp(2) = nglinv(1) + nglinp(1)
          nglinp(3) = nglinv(2) + nglinp(2)
          nglin = nglinv(1) + nglinv(2) + nglinv(3)
          allocate(glcom(nglin))
          nglout = 0
        else
          if(tp.eq.1)then
            nglout = 0
            allocate(glcom(nglout))
          elseif(tp.eq.2)then
            nglout = 2
            allocate(glcom(nglout))
            glcom(1) = gl(1)
            glcom(2) = gl(4)
          endif
        endif
      endif ! end of setting up stuff

      if(i.eq.tp) allocate(glsave(nglin))

      ! debug output
      call mpi_barrier(mpi_comm_world, err)
      write(*,*) i, tp, nglout, nglin
      call mpi_barrier(mpi_comm_world, err)
      if(i.eq.tp) write(*,*) i, nglinv, nglinp
      call mpi_barrier(mpi_comm_world, err)
      ! end debug output

      call mpi_gatherv(glcom, nglout, mpi_type_glist, glsave, nglinv, nglinp, mpi_type_glist, i-1, mpi_comm_world, err)

      if(allocated(glcom)) deallocate(glcom)
    enddo

    ! debug output
    call mpi_barrier(mpi_comm_world, err)
    do i = 1,nglin
      write(*,*) tp, i, glsave(i)
    enddo
    ! end debug output

    call mpi_finalize(err)

end program
4

2 回答 2

3

您的基本错误是您无法通过对其组件的大小求和来计算派生类型的大小,因为这忽略了满足对齐要求所必需的填充。在您的示例中,real(8) 需要在 8 字节边界上对齐,因此如果派生类型包含默认类型整数(大小为 4 字节),则编译器将添加 4 字节的填充以确保下一个元素在派生类型数组中,将从 8 字节边界开始。正如 Platinummonkey 的回答所指出的,这个问题的正确解决方案是定义一个 mpi_type_struct: MPI struct datatype with an array

此外,假设种类数等于类型的大小是不可移植的,它恰好在 gfortran 中工作。

于 2012-02-27T16:46:03.547 回答
2

请参阅我关于构建自己的结构的旧帖子。更可靠,适合任何类型的组合。

带有数组的 MPI 结构数据类型

于 2012-02-27T13:52:17.540 回答