我在使用MPI进行矩阵乘法时遇到了麻烦。
我让程序从两个文件中读取两个 nxn 矩阵,并且应该使用 MPI。但是我在其中一个过程中遇到了分段错误。这是我运行代码时得到的输出:
read matrix A from matrixA
read matrix B from matrixB
mpirun 注意到节点 VirtualBox 上 PID 为 15599 的进程等级 1 在信号 11 上退出(分段错误)。
这是我的代码:
int main (int argc, char * argv[])
{
/* Check the number of arguments */
int n; /* Dimension of the matrix */
float *sa, *sb, *sc; /* Storage for matrix A, B, and C */
float **a, **b, **c; /* 2D array to access matrix A, B, and C */
int i, j, k;
MPI_Init(&argc, &argv); //Initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //Get number of processes
if(argc != 4) {
printf("Usage: %s fileA fileB fileC\n", argv[0]);
return 1;
}
if(rank == 0)
{
/* Read matrix A */
printf("read matrix A from %s\n", argv[1]);
read_matrix(argv[1], &a, &sa, &i, &j);
if(i != j) {
printf("ERROR: matrix A not square\n"); return 2;
}
n = i;
//printf("%d", n);
/* Read matrix B */
printf("Read matrix B from %s\n", argv[2]);
read_matrix(argv[2], &b, &sb, &i, &j);
if(i != j) {
printf("ERROR: matrix B not square\n");
return 2;
}
if(n != i) {
printf("ERROR: matrix A and B incompatible\n");
return 2;
}
}
printf("test");
if(rank == 0)
{
/* Initialize matrix C */
sc = (float*)malloc(n*n*sizeof(float));
memset(sc, 0, n*n*sizeof(float));
c = (float**)malloc(n*sizeof(float*));
for(i=0; i<n; i++) c[i] = &sc[i*n];
}
////////////////////////////////////////////////////////////////////////////////////////////
float matrA[n][n];
float matrB[n][n];
float matrC[n][n];
for(i = 0; i < n; i++)
{
for(j = 0; j < n; j++)
{
matrA[i][j] = sa[(i*n) + j];
matrB[i][j] = sb[(i*n) + j];
}
}
/* Master initializes work*/
if (rank == 0)
{
start_time = MPI_Wtime();
for (i = 1; i < size; i++)
{
//For each slave other than the master
portion = (n / (size - 1)); // Calculate portion without master
low_bound = (i - 1) * portion;
if (((i + 1) == size) && ((n % (size - 1)) != 0))
{
//If rows of [A] cannot be equally divided among slaves,
upper_bound = n; //the last slave gets all the remaining rows.
}
else
{
upper_bound = low_bound + portion; //Rows of [A] are equally divisable among slaves
}
//Send the low bound first without blocking, to the intended slave.
MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);
//Next send the upper bound without blocking, to the intended slave
MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);
//Finally send the allocated row portion of [A] without blocking, to the intended slave
MPI_Isend(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, i, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
}
}
//broadcast [B] to all the slaves
MPI_Bcast(&matrB, n*n, MPI_FLOAT, 0, MPI_COMM_WORLD);
/* work done by slaves*/
if (rank > 0)
{
//receive low bound from the master
MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
//next receive upper bound from the master
MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
//finally receive row portion of [A] to be processed from the master
MPI_Recv(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, 0, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
for (i = low_bound; i < upper_bound; i++)
{
//iterate through a given set of rows of [A]
for (j = 0; j < n; j++)
{
//iterate through columns of [B]
for (k = 0; k < n; k++)
{
//iterate through rows of [B]
matrC[i][j] += (matrA[i][k] * matrB[k][j]);
}
}
}
//send back the low bound first without blocking, to the master
MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
//send the upper bound next without blocking, to the master
MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
//finally send the processed portion of data without blocking, to the master
MPI_Isend(&matrC[low_bound][0],
(upper_bound - low_bound) * n,
MPI_FLOAT,
0,
SLAVE_TO_MASTER_TAG + 2,
MPI_COMM_WORLD,
&request);
}
/* Master gathers processed work*/
if (rank == 0)
{
for (i = 1; i < size; i++)
{
// Until all slaves have handed back the processed data,
// receive low bound from a slave.
MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
//Receive upper bound from a slave
MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);
//Receive processed data from a slave
MPI_Recv(&matrC[low_bound][0],
(upper_bound - low_bound) * n,
MPI_FLOAT,
i,
SLAVE_TO_MASTER_TAG + 2,
MPI_COMM_WORLD,
&status);
}
end_time = MPI_Wtime();
printf("\nRunning Time = %f\n\n", end_time - start_time);
}
MPI_Finalize(); //Finalize MPI operations
/* Do the multiplication */
//////////////////////////////////////////////////// matmul(a, b, c, n);
for(i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sc[(i*n) + j] = matrC[i][j];
}
}
}