6

I've few problems with an MPI program in C. I want to send two messages with MPI_Send from slaves to master (using MPI_Send, MPI_Irecv and MPI_Test), but only the first message works. After that, i've an infinite loop and i always receive a message from the slave -1 (according to status.MPI_Source).

So i don't understand why i receive all these messages from an unknown process (-1)...

My code :

#include <stdio.h>
#include <mpi.h>
#include <sys/time.h>

int main(int argc, char *argv[])
{

int rank, size;
MPI_Status status;

/* Init */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

if (rank != 0) { // Slaves
    int buf;

    if (rank == 1) {
        buf = 1;
        MPI_Send(&buf, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 
    }
    if (rank == 2) {
        buf = 2;
        MPI_Send(&buf, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 
    }

}
else { // Master
    int sum = 0;
    int flag, res;
    MPI_Request request;
    MPI_Status status;

    MPI_Irecv(&res, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request);

    while (1) { 
        flag = 0;

        MPI_Test(&request, &flag, &status);

        if (flag != 0) { 
            printf("recv : %d, slave : %d\n", res, status.MPI_SOURCE);
            if (status.MPI_SOURCE != -1) 
                sum += res;
        }
        else
            printf("fail!\n");

        if (sum == 3)
            break;
    }

    printf("sum : %d\n", sum);
}

MPI_Finalize();
return 0;

}

Thanks.

ps : sorry for my english

4

2 回答 2

12

One thing is that you must call MPI_Irecv each time you are expecting a message. So in your case you have to call it 2 times. No more, no less.

Lets take a look on code changed by only moving MPI_Irecv call inside loop. It's not correct. Will not work.

else { // Master
int sum = 0;
int flag, res;
MPI_Request request;
MPI_Status status;

while (1) { 
    flag = 0;
    MPI_Irecv(&res, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request);
    MPI_Test(&request, &flag, &status);
    if (flag != 0) { 
        printf("recv : %d, slave : %d\n", res, status.MPI_SOURCE);
        if (status.MPI_SOURCE != -1) 
            sum += res;
    }
    else
        printf("fail!\n");

    if (sum == 3)
        break;
}

Assuming random time of delivering messages sended by slaves (which is always the case when we are talking about distributed systems or threads) it's easy to imagine that situation: Moment of time | event

0              | called first MPI_Irecv, allocated memory for MPI_Request object
1              | called second MPI_Irecv, allocated memory for MPI_Request (lets say) object2
2              | called third MPI_Irecv, allocated memory for MPI_Request object3
3              | called MPI_Send in slave no. 1
4              | called MPI_Send in slave no. 2
5              | received message by master from slave no. 1, filled object, flag variable still 0 because its related to object3
6              | received message by master from slave no. 2, filled object2, flag variable still 0 because its related to object3
7,8,9...       | infinite loop, flag still has value 0
 n            | error: MPI_Irecv(147): MPI_Irecv(buf=0x7fffecfa60c4, count=1, MPI_INT, src=MPI_ANY_SOURCE, tag=MPI_ANY_TAG, MPI_COMM_WORLD, request=0x7fffecfa60c8)
MPID_Irecv(53): failure occurred while allocating memory for a request object

There are two solutions. You can slow down Master process by calling sleep(3) before while loop, so it will definitely starts later then MPI_Send's called.

Second, better engineering approach is to call MPI_Irecv only when we are expecting message. Initially call the MPI_Irecv and assign value 0 to the flag. After we receive message change flag again to -1 and call MPI_Irecv only when the flag has -1 value.

Here is code it works:

#include <stdio.h>
#include <mpi.h>
#include <sys/time.h>

int main(int argc, char *argv[])
{

int rank, size;
MPI_Status status;

/* Init */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

if (rank != 0) { // Slaves
    int buf;

    if (rank == 1) {
        buf = 1;
        MPI_Send(&buf, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 
    }
    if (rank == 2) {
        buf = 2;
        MPI_Send(&buf, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 
    }

}
else { // Master
    int sum = 0;
    int flag = -1, res;
    MPI_Request request;
    MPI_Status status;
    while (1) { 
    if(flag != 0)
    {
        MPI_Irecv(&res, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request);
        flag = 0;
    }
        MPI_Test(&request, &flag, &status);

        if (flag != 0) { 
            printf("recv : %d, slave : %d\n", res, status.MPI_SOURCE);
            if (status.MPI_SOURCE != -1) 
                sum += res;
        flag = -1;
        }


        if (sum == 3)
            break;
    }

    printf("sum : %d\n", sum);
}

MPI_Finalize();
return 0;

}
于 2013-05-16T14:17:47.260 回答
4

The problem is that the master only ever posts one receive. You'll need to move the call to MPI_Irecv inside the loop, at the point where MPI_Test returned with success (inside the if (status.MPI_SOURCE != -1) block), so that subsequent messages can be received.

于 2013-02-26T17:53:20.913 回答