1

我尝试运行调用 MPI_Comm_spawn_multiple 来生成多个进程,但是,有一些有趣的东西。当我生成 40 个子进程时,一切顺利。当我产生 80 个子进程时,它告诉我分段错误。问题发生的条件是您在 MPI_Info 中设置了“wdir”并且生成的子进程的数量足够大(在我的测试中,它应该大于 53)。我想知道是否有人遭受或知道如何解决这个问题。我的 MPI 版本是 mvapich2-1.8.1。

下面是我的代码:

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

#define P_SIZE 80
int main( int argc, char *argv[] )
{
    int np[P_SIZE],i;
    int errcodes[P_SIZE];
    MPI_Comm parentcomm, intercomm;
    char *cmds[P_SIZE];
    MPI_Info infos[P_SIZE];

    for(i=0;i<P_SIZE;i++){
    np[i]=1;
        MPI_Info_create(&infos[i]);
    cmds[i]="./spawn_example";
    }
for(i=0;i<10;i++){ MPI_Info_set(infos[i], "hosts", "node1");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=10;i<20;i++){ MPI_Info_set(infos[i], "hosts", "node2");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=20;i<30;i++){ MPI_Info_set(infos[i], "hosts", "node3");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=30;i<40;i++){ MPI_Info_set(infos[i], "hosts", "node4");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=40;i<50;i++){ MPI_Info_set(infos[i], "hosts", "node5");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=50;i<60;i++){ MPI_Info_set(infos[i], "hosts", "node6");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=60;i<70;i++){ MPI_Info_set(infos[i], "hosts", "node7");MPI_Info_set(infos[i], "wdir", "/home");}
for(i=70;i<80;i++){ MPI_Info_set(infos[i], "hosts", "node8");MPI_Info_set(infos[i], "wdir", "/home");}

    MPI_Init( &argc, &argv );
    MPI_Comm_get_parent( &parentcomm );
    if (parentcomm == MPI_COMM_NULL)
    {
        MPI_Comm_spawn_multiple( P_SIZE, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_SELF, &intercomm, errcodes );
        printf("I'm the parent.\n"); 
    }
    else
    {
        printf("I'm the spawned.\n");
    }
    fflush(stdout);
    MPI_Finalize();
    return 0;
}
4

0 回答 0