/**
* BLOCK_LOW
* Returns the offset of a local array
* with regards to block decomposition
* of a global array.
*
* @param (int) process rank
* @param (int) total number of processes
* @param (int) size of global array
* @return (int) offset of local array in global array
*/
#define BLOCK_LOW(id, p, n) ((id)*(n)/(p))
/**
* BLOCK_HIGH
* Returns the index immediately after the
* end of a local array with regards to
* block decomposition of a global array.
*
* @param (int) process rank
* @param (int) total number of processes
* @param (int) size of global array
* @return (int) offset after end of local array
*/
#define BLOCK_HIGH(id, p, n) (BLOCK_LOW((id)+1, (p), (n)))
/**
* BLOCK_SIZE
* Returns the size of a local array
* with regards to block decomposition
* of a global array.
*
* @param (int) process rank
* @param (int) total number of processes
* @param (int) size of global array
* @return (int) size of local array
*/
#define BLOCK_SIZE(id, p, n) ((BLOCK_HIGH((id), (p), (n))) - (BLOCK_LOW((id), (p), (n))))
/**
* BLOCK_OWNER
* Returns the rank of the process that
* handles a certain local array with
* regards to block decomposition of a
* global array.
*
* @param (int) index in global array
* @param (int) total number of processes
* @param (int) size of global array
* @return (int) rank of process that handles index
*/
#define BLOCK_OWNER(i, p, n) (((p)*((i)+1)-1)/(n))
/*Matricefilenames:
small matrix A.bin of dimension 100 × 50
small matrix B.bin of dimension 50 × 100
large matrix A.bin of dimension 1000 × 500
large matrix B.bin of dimension 500 × 1000
An MPI program should be implemented such that it can
• accept two file names at run-time,
• let process 0 read the A and B matrices from the two data files,
• let process 0 distribute the pieces of A and B to all the other processes,
• involve all the processes to carry out the the chosen parallel algorithm
for matrix multiplication C = A * B ,
• let process 0 gather, from all the other processes, the different pieces
of C ,
• let process 0 write out the entire C matrix to a data file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include "mpi-utils.c"
void read_matrix_binaryformat (char*, double***, int*, int*);
void write_matrix_binaryformat (char*, double**, int, int);
void create_matrix (double***,int,int);
void matrix_multiplication (double ***, double ***, double ***,int,int, int);
int main(int argc, char *argv[]) {
int id,p; // Process rank and total amount of processes
int rowsA, colsA, rowsB, colsB; // Matrix dimensions
double **A; // Matrix A
double **B; // Matrix B
double **C; // Result matrix C : AB
int local_rows; // Local row dimension of the matrix A
double **local_A; // The local A matrix
double **local_C; // The local C matrix
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &id);
MPI_Comm_size (MPI_COMM_WORLD, &p);
if(argc != 3) {
if(id == 0) {
printf("Usage:\n>> %s matrix_A matrix_B\n",argv[0]);
}
MPI_Finalize();
exit(1);
}
if (id == 0) {
read_matrix_binaryformat (argv[1], &A, &rowsA, &colsA);
read_matrix_binaryformat (argv[2], &B, &rowsB, &colsB);
}
if (p == 1) {
create_matrix(&C,rowsA,colsB);
matrix_multiplication (&A,&B,&C,rowsA,colsB,colsA);
char* filename = "matrix_C.bin";
write_matrix_binaryformat (filename, C, rowsA, colsB);
free(A);
free(B);
free(C);
MPI_Finalize();
return 0;
}
// For this assignment we have chosen to bcast the whole matrix B:
MPI_Bcast (&B, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast (&colsA, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (&colsB, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (&rowsA, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (&rowsB, 1, MPI_INT, 0, MPI_COMM_WORLD);
local_rows = BLOCK_SIZE(id, p, rowsA);
/* SCATTER VALUES */
int *proc_elements = (int*)malloc(p*sizeof(int)); // amount of elements for each processor
int *displace = (int*)malloc(p*sizeof(int)); // displacement of elements for each processor
int i;
for (i = 0; i<p; i++) {
proc_elements[i] = BLOCK_SIZE(i, p, rowsA)*colsA;
displace[i] = BLOCK_LOW(i, p, rowsA)*colsA;
}
create_matrix(&local_A,local_rows,colsA);
MPI_Scatterv(&A[0],&proc_elements[0],&displace[0],MPI_DOUBLE,&local_A[0],
local_rows*colsA,MPI_DOUBLE,0,MPI_COMM_WORLD);
/* END SCATTER VALUES */
create_matrix (&local_C,local_rows,colsB);
matrix_multiplication (&local_A,&B,&local_C,local_rows,colsB,colsA);
/* GATHER VALUES */
MPI_Gatherv(&local_C[0], rowsA*colsB, MPI_DOUBLE,&C[0],
&proc_elements[0],&displace[0],MPI_DOUBLE,0, MPI_COMM_WORLD);
/* END GATHER VALUES */
char* filename = "matrix_C.bin";
write_matrix_binaryformat (filename, C, rowsA, colsB);
free (proc_elements);
free (displace);
free (local_A);
free (local_C);
free (A);
free (B);
free (C);
MPI_Finalize ();
return 0;
}
void create_matrix (double ***C,int rows,int cols) {
*C = (double**)malloc(rows*sizeof(double*));
(*C)[0] = (double*)malloc(rows*cols*sizeof(double));
int i;
for (i=1; i<rows; i++)
(*C)[i] = (*C)[i-1] + cols;
}
void matrix_multiplication (double ***A, double ***B, double ***C, int rowsC,int colsC,int colsA) {
double sum;
int i,j,k;
for (i = 0; i < rowsC; i++) {
for (j = 0; j < colsC; j++) {
sum = 0.0;
for (k = 0; k < colsA; k++) {
sum = sum + (*A)[i][k]*(*B)[k][j];
}
(*C)[i][j] = sum;
}
}
}
/* Reads a 2D array from a binary file*/
void read_matrix_binaryformat (char* filename, double*** matrix, int* num_rows, int* num_cols) {
int i;
FILE* fp = fopen (filename,"rb");
fread (num_rows, sizeof(int), 1, fp);
fread (num_cols, sizeof(int), 1, fp);
/* storage allocation of the matrix */
*matrix = (double**)malloc((*num_rows)*sizeof(double*));
(*matrix)[0] = (double*)malloc((*num_rows)*(*num_cols)*sizeof(double));
for (i=1; i<(*num_rows); i++)
(*matrix)[i] = (*matrix)[i-1]+(*num_cols);
/* read in the entire matrix */
fread ((*matrix)[0], sizeof(double), (*num_rows)*(*num_cols), fp);
fclose (fp);
}
/* Writes a 2D array in a binary file */
void write_matrix_binaryformat (char* filename, double** matrix, int num_rows, int num_cols) {
FILE *fp = fopen (filename,"wb");
fwrite (&num_rows, sizeof(int), 1, fp);
fwrite (&num_cols, sizeof(int), 1, fp);
fwrite (matrix[0], sizeof(double), num_rows*num_cols, fp);
fclose (fp);
}
我的任务是对矩阵 A 和 B 进行并行矩阵乘法,并将结果收集到矩阵 C 中。
我通过将矩阵 A 划分为行段来做到这一点,每个进程将使用它的部分来乘以矩阵 B,并从乘法中取回它的部分。然后我将收集过程中的所有部分并将它们放在一起到矩阵 C 中。
我已经发布了一个类似的问题,但是这段代码得到了改进,我已经取得了进步,但是在 scatterv 调用之后我仍然遇到分段错误。