2

我在使用MPI进行矩阵乘法时遇到了麻烦。

我让程序从两个文件中读取两个 nxn 矩阵,并且应该使用 MPI。但是我在其中一个过程中遇到了分段错误。这是我运行代码时得到的输出:

read matrix A from matrixA
read matrix B from matrixB

mpirun 注意到节点 VirtualBox 上 PID 为 15599 的进程等级 1 在信号 11 上退出(分段错误)。

这是我的代码:

int main (int argc, char * argv[])
{
    /* Check the number of arguments */
    int n; /* Dimension of the matrix */
    float *sa, *sb, *sc; /* Storage for matrix A, B, and C */
    float **a, **b, **c; /* 2D array to access matrix A, B, and C */
    int i, j, k;

    MPI_Init(&argc, &argv); //Initialize MPI operations
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Get the rank
    MPI_Comm_size(MPI_COMM_WORLD, &size); //Get number of processes

    if(argc != 4) {
        printf("Usage: %s fileA fileB fileC\n", argv[0]);
        return 1;
    }

    if(rank == 0)
    {
        /* Read matrix A */
        printf("read matrix A from %s\n", argv[1]);
        read_matrix(argv[1], &a, &sa, &i, &j);
        if(i != j) {
            printf("ERROR: matrix A not square\n"); return 2;
        }
        n = i;

        //printf("%d", n);

        /* Read matrix B */
        printf("Read matrix B from %s\n", argv[2]);
        read_matrix(argv[2], &b, &sb, &i, &j);
        if(i != j) {
            printf("ERROR: matrix B not square\n");
            return 2;
        }
        if(n != i) {
            printf("ERROR: matrix A and B incompatible\n");
            return 2;
         }
    }

    printf("test");

    if(rank == 0)
    {
        /* Initialize matrix C */
        sc = (float*)malloc(n*n*sizeof(float));
        memset(sc, 0, n*n*sizeof(float));
        c = (float**)malloc(n*sizeof(float*));
        for(i=0; i<n; i++) c[i] = &sc[i*n];
    }

    ////////////////////////////////////////////////////////////////////////////////////////////
    float matrA[n][n];
    float matrB[n][n];
    float matrC[n][n];

    for(i = 0; i < n; i++)
    {
        for(j = 0; j < n; j++)
        {
            matrA[i][j] = sa[(i*n) + j];
            matrB[i][j] = sb[(i*n) + j];
        }
    }
    /* Master initializes work*/
    if (rank == 0)
    {
        start_time = MPI_Wtime();
        for (i = 1; i < size; i++)
        {
            //For each slave other than the master
            portion = (n / (size - 1)); // Calculate portion without master
            low_bound = (i - 1) * portion;
            if (((i + 1) == size) && ((n % (size - 1)) != 0))
            {
                //If rows of [A] cannot be equally divided among slaves,
                upper_bound = n; //the last slave gets all the remaining rows.
            }
            else
            {
                upper_bound = low_bound + portion; //Rows of [A] are equally divisable among slaves
            }
            //Send the low bound first without blocking, to the intended slave.
            MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);

            //Next send the upper bound without blocking, to the intended slave
            MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);

            //Finally send the allocated row portion of [A] without blocking, to the intended slave
            MPI_Isend(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, i,     MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
        }
    }


    //broadcast [B] to all the slaves
    MPI_Bcast(&matrB, n*n, MPI_FLOAT, 0, MPI_COMM_WORLD);
    /* work done by slaves*/
    if (rank > 0)
    {
        //receive low bound from the master
        MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
        //next receive upper bound from the master
        MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
        //finally receive row portion of [A] to be processed from the master
        MPI_Recv(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, 0,   MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
        for (i = low_bound; i < upper_bound; i++)
        {
        //iterate through a given set of rows of [A]
        for (j = 0; j < n; j++)
        {
            //iterate through columns of [B]
                for (k = 0; k < n; k++)
            {
                //iterate through rows of [B]
                matrC[i][j] += (matrA[i][k] * matrB[k][j]);
            }
        }
        }


        //send back the low bound first without blocking, to the master
        MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
        //send the upper bound next without blocking, to the master
        MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
        //finally send the processed portion of data without blocking, to the master
        MPI_Isend(&matrC[low_bound][0],
                  (upper_bound - low_bound) * n,
                  MPI_FLOAT,
                  0,
                  SLAVE_TO_MASTER_TAG + 2,
                  MPI_COMM_WORLD,
                  &request);
    }

    /* Master gathers processed work*/
    if (rank == 0)
    {
        for (i = 1; i < size; i++)
        {
            // Until all slaves have handed back the processed data,
            // receive low bound from a slave.
            MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);

            //Receive upper bound from a slave
            MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);

            //Receive processed data from a slave
            MPI_Recv(&matrC[low_bound][0],
                     (upper_bound - low_bound) * n,
                     MPI_FLOAT,
                     i,
                     SLAVE_TO_MASTER_TAG + 2,
                     MPI_COMM_WORLD,
                     &status);
        }
        end_time = MPI_Wtime();
        printf("\nRunning Time = %f\n\n", end_time - start_time);
    }
    MPI_Finalize(); //Finalize MPI operations

    /* Do the multiplication */
    ////////////////////////////////////////////////////  matmul(a, b, c, n);
    for(i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sc[(i*n) + j] = matrC[i][j];
        }
    }
}
4

1 回答 1

3

每个进程都声明了指向矩阵的指针,即:

float *sa, *sb, *sc; /* storage for matrix A, B, and C */

但只有进程0(分配和)填充数组sasb

if(rank == 0)
  {
      ...
      read_matrix(argv[1], &a, &sa, &i, &j);
      ...
      read_matrix(argv[2], &b, &sb, &i, &j);
      ...
  }

但是,之后每个进程都会尝试访问saandsb数组的位置:

for(i = 0; i < n; i++)
{
   for(j = 0; j < n; j++)
   {
       matrA[i][j] = sa[(i*n) + j];
       matrB[i][j] = sb[(i*n) + j];
   }
}

由于只有进程0(已分配和)填满了数组saand sb,其余进程正在尝试访问它们尚未分配的内存( sa[(i*n) + j]and )。sb[(i*n) + j]因此,您获得segmentation fault.

附带说明一下,您的程序中还有另一个问题 - 您启动非阻塞发送,MPI_Isend但从不等待返回的请求句柄完成。甚至不需要 MPI 实现来启动发送操作,直到它正确地完成,主要是通过调用等待或测试操作之一(MPI_WaitMPI_WaitsomeMPI_Waitall等)。更糟糕的是,您重复使用相同的句柄变量request,实际上丢失了所有先前发起的请求的句柄,这使得它们无法等待/不可测试。MPI_Waitall改为使用请求数组,并在发送循环后等待所有请求完成。

还要考虑一下—— 你真的需要非阻塞操作来从工作人员那里发回数据吗?

于 2012-12-01T04:21:17.300 回答