3

我想将派生类型 xyzBuffer 从处理器 1 复制到处理器 0 的 xyz。我尝试使用 MPI_GATHER:

 call MPI_GATHERV(xyzBuffer,1,inewtype,xyz,1, dispGather,inewtype,0,icomm,ierr)

但是处理器 0 将有未写入的内存位:似乎 MPI_GATHER 不允许聚集派生类型。我使用了 MPI_ISEND/MPI_IRECV,但程序挂在下面的代码行:

 if ( iproc == 1 ) then
       call MPI_ISEND(xyz,1,inewtype,1,itag,icomm,ireq,ierr)
       call MPI_WAIT(ireq,istatus,ierr)
    else if ( iproc == 0 ) then 
       call MPI_IRECV(xyz,1,inewtype,0,itag,icomm,ireq,ierr)
       call MPI_WAIT(ireq,istatus,ierr)
    end if

这些方法不是要与派生类型一起使用吗?

以下是完整的程序。我在测试 MPI_ISEND、MPI_IRECV 块时注释掉 MPI_GATHER,反之亦然。

program type_derived_gather
  use nodeinfo
  implicit none
  include 'mpif.h'
  integer(4) :: ierr
  integer(4) :: istatus(MPI_STATUS_SIZE)
  integer(4) :: i
  integer(4) :: j
  integer(4) :: iblock(8)
  integer(4) :: idisp(8)
  integer(4) :: itype(8)
  integer(4) :: inewtype
  integer(4) :: iextent
  integer(4) :: itag
  integer(4) :: ireq, isend, irecv
  integer(4) :: dispGather ! for root

    TYPE :: newXYZ
        integer :: x, u
        integer :: y, v
        integer :: z, w
        integer,dimension(3) :: uvw     
    END TYPE

    TYPE (newXYZ) :: xyzBuffer
    TYPE (newXYZ) :: xyz


  call MPI_INIT(ierr)
  icomm = MPI_COMM_WORLD
  call MPI_COMM_SIZE(icomm,nproc,ierr)
  call MPI_COMM_RANK(icomm,iproc,ierr)


    if (iproc == 1) then
        xyz%x = 1
        xyz%y = 2
        xyz%z = 3
        xyz%u = 4
        xyz%v = 5
        xyz%w = 6
        xyz%uvw = (/10,10,10/)
    else
        xyz%x = 0
        xyz%y = 0       
        xyz%z = 0
        xyz%u = 0
        xyz%v = 0       
        xyz%w = 0 
        xyz%uvw = (/0,0,0/)
    endif


! Derived type
  iblock(1) = 1
  iblock(2) = 1
  iblock(3) = 1
  iblock(4) = 1
  iblock(5) = 1
  iblock(6) = 1
  iblock(7) = 3
  iblock(8) = 1

  idisp(1)  = 0  ! in bytes
  idisp(2)  = 4*1  ! in bytes
  idisp(3)  = 4*2  ! in bytes 
  idisp(4)  = 4*3  ! in bytes 
  idisp(5)  = 4*4  ! in bytes
  idisp(6)  = 4*5  ! in bytes 
  idisp(7)  = 4*6  ! in bytes 
  idisp(8)  = 4*9  ! in bytes    

  itype(1)  = MPI_INTEGER
  itype(2)  = MPI_INTEGER
  itype(3)  = MPI_INTEGER
  itype(4)  = MPI_INTEGER
  itype(5)  = MPI_INTEGER
  itype(6)  = MPI_INTEGER
  itype(7)  = MPI_INTEGER
  itype(8)  = MPI_UB  
  call MPI_TYPE_STRUCT(8,iblock,idisp,itype,inewtype,ierr)
  call MPI_TYPE_EXTENT(inewtype,iextent,ierr)
  write(6,*)'newtype extent = ',iextent  
  call MPI_TYPE_COMMIT(inewtype,ierr)

    itag = 1
    dispGather = 0


  do j = 1, 2
     if ( j == 2 ) then
! Gather
        call MPI_GATHERV(xyzBuffer,1,inewtype,xyz,1, dispGather,inewtype,0,icomm,ierr)
! Isend Irecv 
        if ( iproc == 1 ) then
           call MPI_ISEND(xyz,1,inewtype,1,itag,icomm,isend,ierr)
           write(6,*)'end send'
           call MPI_WAIT(isend,istatus,ierr)
        else if ( iproc == 0 ) then
           call MPI_IRECV(xyz,1,inewtype,0,itag,icomm,irecv,ierr)
           write(6,*)'end receive'
           call MPI_WAIT(irecv,istatus,ierr)
        end if
! Output 
     end if      
     call MPI_BARRIER(icomm,ierr)
     if ( iproc == 0 )write(6,*)'iproc = ',iproc
        if ( iproc == 0 ) write(6,*)xyz

     call MPI_BARRIER(icomm,ierr)
     if ( iproc == 1 )write(6,*)'iproc = ',iproc
        if ( iproc == 1 ) write(6,*)xyz
  end do

  call MPI_FINALIZE(ierr)
end program type_derived_gather

当我使用 MPI_ISEND 和 MPI_IRECV 块运行时,程序挂起并且输出为:

 iproc =            0
           0           0           0           0           0           0           0           0           0
 end receive
 newtype extent =           36
 iproc =            1
           1           4           2           5           3           6          10          10          10
 end send

当 MPI_GATHER 运行时,我收到带有输出的分段错误:

 newtype extent =           36
 iproc =            0
           0           0           0           0           0           0           0           0           0
 newtype extent =           36
 iproc =            1
           1           4           2           5           3           6          10          10          10
 newtype extent =           36
 newtype extent =           36
 newtype extent =           36
 newtype extent =           36
 newtype extent =           36
 newtype extent =           36
[west0302:17101] *** Process received signal ***
[west0302:17101] Signal: Segmentation fault (11)
[west0302:17101] Signal code: Address not mapped (1)
[west0302:17101] Failing at address: 0x7ff2c8d1ddc0
[west0302:17101] [ 0] /lib64/libpthread.so.0 [0x3d3540eb70]
[west0302:17101] [ 1] /lib64/libc.so.6(memcpy+0xe1) [0x3d3487c321]
[west0302:17101] [ 2] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(ompi_convertor_unpack+0x153) [0x2acd5f392093]
[west0302:17101] [ 3] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress+0x7d1) [0x2acd6423dd91]
[west0302:17101] [ 4] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_pml_ob1.so [0x2acd6423a4c7]
[west0302:17101] [ 5] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_btl_sm.so(mca_btl_sm_component_progress+0xde2) [0x2acd64ca81c2]
[west0302:17101] [ 6] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_bml_r2.so(mca_bml_r2_progress+0x2a) [0x2acd6444504a]
[west0302:17101] [ 7] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libopen-pal.so.0(opal_progress+0x4a) [0x2acd5f84a9ba]
[west0302:17101] [ 8] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv+0x2b5) [0x2acd64238565]
[west0302:17101] [ 9] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib/openmpi/mca_coll_basic.so(mca_coll_basic_gatherv_intra+0x14a) [0x2acd650bb37a]
[west0302:17101] [10] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi.so.0(MPI_Gatherv+0x1b0) [0x2acd5f3a4170]
[west0302:17101] [11] /n/sw/openmpi-1.2.5-gcc-4.1.2/lib64/libmpi_f77.so.0(mpi_gatherv__+0x134) [0x2acd5f142784]
[west0302:17101] [12] ./type_derived_gather.x(MAIN__+0x342) [0x401742]
[west0302:17101] [13] ./type_derived_gather.x(main+0xe) [0x403fee]
[west0302:17101] [14] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3d3481d994]
[west0302:17101] [15] ./type_derived_gather.x [0x401349]
[west0302:17101] *** End of error message ***
4

3 回答 3

3

是的,您当然可以这样做:您的代码挂在MPI_Isend()/上的问题MPI_Irecv()是您正在发送/接收错误的进程;您希望 1 发送到 0,0 发送到 1,而不是 1 发送到 1 和 0 从 0 接收。0 永远不会收到该幻像消息(因为它不存在),并且您正在挂起。

    if ( iproc == 1 ) then
       call MPI_ISEND(xyz,1,inewtype,1,itag,icomm,isend,ierr)
       write(6,*)'end send'
       call MPI_WAIT(isend,istatus,ierr)
    else if ( iproc == 0 ) then
       call MPI_IRECV(xyz,1,inewtype,0,itag,icomm,irecv,ierr)
       write(6,*)'end receive'
       call MPI_WAIT(irecv,istatus,ierr)
    end if

应该

    if ( iproc == 1 ) then
       call MPI_ISEND(xyz,1,inewtype,0,itag,icomm,isend,ierr)
       call MPI_WAIT(isend,istatus,ierr)
    else if ( iproc == 0 ) then
       call MPI_IRECV(xyz,1,inewtype,1,itag,icomm,irecv,ierr)
       call MPI_WAIT(irecv,istatus,ierr)
    end if

至于更大的问题,当然可以使用MPI_Type_create_struct()(注意,您应该使用这个更新的例程而不是MPI_Create_struct()Fortran 派生的数据类型。正如@elorenz 指出的那样,手动计算偏移量不仅乏味且容易出错,但可能不正确;编译器有很多自由来填充等,以实现有效的内存访问。在你的情况下,它可能会工作,因为它都是整数,但对于具有混合大小的字段的类型,你会遇到麻烦。

处理该问题的正确方法是使用MPI_Get_address为您计算字段偏移量;下面是一个完整的例子。

program type_derived_gather
  use iso_fortran_env
  use mpi
  implicit none
  integer :: ierr
  integer, parameter :: nfields=4
  integer :: iblock(nfields)
  integer(kind=MPI_ADDRESS_KIND) :: start, idisp(nfields)
  integer :: itype(nfields)
  integer :: inewtype
  integer :: nproc, iproc
  integer :: i

  type :: newXYZ
       integer :: id
       real(kind=real64) :: x, y, z
  end type

  type(newXYZ), dimension(:), allocatable :: allxyzs
  type(newXYZ) :: locxyz

  call MPI_INIT(ierr)
  call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)
  call MPI_COMM_RANK(MPI_COMM_WORLD,iproc,ierr)

  locxyz % x  = 1.d0*iproc
  locxyz % y  = 2.d0*iproc
  locxyz % z  = 3.d0*iproc
  locxyz % id = iproc

  if (iproc == 0) allocate(allxyzs(nproc))

  ! everyone builds the type

  iblock = 1

  itype(1)  = MPI_INTEGER
  itype(2:4)= MPI_DOUBLE_PRECISION

  call MPI_Get_address(locxyz,    start, ierr)
  call MPI_Get_address(locxyz%id, idisp(1), ierr)
  call MPI_Get_address(locxyz%x,  idisp(2), ierr)
  call MPI_Get_address(locxyz%y,  idisp(3), ierr)
  call MPI_Get_address(locxyz%z,  idisp(4), ierr)

  idisp = idisp - start

  call MPI_Type_create_struct(nfields,iblock,idisp,itype,inewtype,ierr)
  call MPI_Type_commit(inewtype,ierr)

  ! Now gather the structs

  print '(A,I3,A,I3,1X,3(F6.2,1X))', 'Rank ', iproc, ': locxyz = ', locxyz%id, locxyz%x, locxyz%y, locxyz%z

  call MPI_Gather(locxyz, 1, inewtype, allxyzs, 1, inewtype, 0, MPI_COMM_WORLD, ierr)

  if (iproc == 0) then
      print '(A,I3,A)', 'Rank ', iproc, ' has -- '
      do i=1, nproc
          print '(A,I3,A,I3,1X,3(F6.2,1X))', '    ', i, ': ', allxyzs(i)%id, allxyzs(i)%x, allxyzs(i)%y, allxyzs(i)%z
      enddo
      deallocate(allxyzs)
  end if

  call MPI_FINALIZE(ierr)

end program type_derived_gather
于 2013-05-19T13:51:04.920 回答
0

是的,它可以。但请确保在数据类型的定义中使用“序列”!否则,编译器可以在类型成员如何在内存中对齐方面采取一些自由度。这可能会导致复制缓冲区时出现一些乱码。

于 2013-05-17T14:28:39.730 回答
0

当然,您可以将 MPI_Gather(或其他集合)与派生数据类型一起使用。任何采用 MPI_Datatype 参数的 MPI 函数都可以与派生数据类型一起使用。如果您发布一个如何构造和使用派生数据类型的最小示例,我们可能会提供更好的帮助。

于 2012-11-17T03:49:36.110 回答