1

我有一个使用 MPI 编写的超立方体的一对多广播方法:

one2allbcast(int n, int rank, void *data, int count, MPI_Datatype dtype)
{
  MPI_Status status;
  int mask, partner;
  int mask2 = ((1 << n) - 1) ^ (1 << n-1);

  for (mask = (1 << n-1); mask; mask >>= 1, mask2 >>= 1)
  {
    if (rank & mask2 == 0)
    {
      partner = rank ^ mask;
      if (rank & mask)
       MPI_Recv(data, count, dtype, partner, 99, MPI_COMM_WORLD, &status);
      else
       MPI_Send(data, count, dtype, partner, 99, MPI_COMM_WORLD);
    }
  }
}

从 main 调用它时:

int main( int argc, char **argv )
{
  int n, rank;

  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &n);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);

  one2allbcast(floor(log(n) / log (2)), rank, "message", sizeof(message), MPI_CHAR);

  MPI_Finalize();

  return 0;
}

在 8 个节点上编译和执行,我收到一系列错误报告,报告进程 1、3、5、7 在接收任何数据之前停止:

MPI_Recv: process in local group is dead (rank 1, MPI_COMM_WORLD)
Rank (1, MPI_COMM_WORLD): Call stack within LAM:
Rank (1, MPI_COMM_WORLD):  - MPI_Recv()
Rank (1, MPI_COMM_WORLD):  - main()
MPI_Recv: process in local group is dead (rank 3, MPI_COMM_WORLD)
Rank (3, MPI_COMM_WORLD): Call stack within LAM:
Rank (3, MPI_COMM_WORLD):  - MPI_Recv()
Rank (3, MPI_COMM_WORLD):  - main()
MPI_Recv: process in local group is dead (rank 5, MPI_COMM_WORLD)
Rank (5, MPI_COMM_WORLD): Call stack within LAM:
Rank (5, MPI_COMM_WORLD):  - MPI_Recv()
Rank (5, MPI_COMM_WORLD):  - main()
MPI_Recv: process in local group is dead (rank 7, MPI_COMM_WORLD)
Rank (7, MPI_COMM_WORLD): Call stack within LAM:
Rank (7, MPI_COMM_WORLD):  - MPI_Recv()
Rank (7, MPI_COMM_WORLD):  - main()

我哪里错了?

4

3 回答 3

3

事实证明,错误在行

if (rank & mask2 == 0)

我没有考虑操作员优先级。正确且有效的书写方式是

if ((rank & mask2) == 0)

&首先评估按位的位置。

于 2010-06-10T07:22:20.627 回答
0

在调用a 后请求 MPI 通信时,这是一个常见错误MPI_Finalize。在调用MPI_Finalizemake 之前测试是否所有 MPI 调用都已完成。

于 2010-06-08T14:22:31.060 回答
0
// a c# class for one-to-all broadcast on a hypercube
    class Program
        {
            static void Main(string[] args)
            {
                using (new MPI.Environment(ref args))
                {
                    string strMsg = "msg";
                    Intracommunicator comm = Communicator.world;
                    int my_id = comm.Rank;
                    int d = Convert.ToInt32(Math.Log(comm.Size, 2));

                  ///////////////////////////////////////////////  

                    int power2i = 0;
                    int msgDestination = 0, msgSource = 0;
                    //d=0xff;
                    //Console.WriteLine("d:{0:x}", d);
                    int mask = Convert.ToInt32(Math.Pow(2, d)) - 1;
                    //Console.WriteLine("first: " + Convert.ToString(mask, 2));

                    //Console.WriteLine("second: " + mask.ToString());
                    comm.Barrier();
                    for (int i = d - 1; i >= 0; i--)
                    {
                        power2i = Convert.ToInt32(Math.Pow(2, i));
                        mask = mask ^ power2i;
                        //Console.WriteLine("third: " + "i: " + i + "-" + Convert.ToString(mask, 2));

                        if ((my_id & mask) == 0)
                        {
                            if ((my_id & power2i) == 0)
                            {
                                msgDestination = my_id ^ power2i;
                                comm.Send<string>(strMsg, msgDestination, 1);
                                Console.WriteLine("process: " + my_id + "- sent: " + strMsg + " to: " + msgDestination + "-@: " + DateTime.Now.Millisecond);


                            }
                            else
                            {
                                msgSource = my_id ^ power2i;
                                strMsg = comm.Receive<string>(msgSource, 1);
                                //Console.WriteLine("process: " + my_id + "- received: " + strMsg + " from: " + msgSource + "-@: " + i);
                            }
                        }

                    }

                    /////////////////////////////////////////////////////////////////////////////////

                }
            }
于 2011-07-02T14:55:30.153 回答