1
#include <math.h>
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*-------------------------------------*/
#include<fcntl.h>
#include<unistd.h>
#include<sys/stat.h>
#include<sys/time.h>
#include<sys/mman.h>
#include<sys/types.h>
#include <mpi.h>

#define PERMS 0600
#define PAGE 4096

int ntask=0, mytask=0;

void
write_result (char *detail)
{
    int fd;

    char *filename = (char *) malloc (1000*sizeof(char));

    sprintf(filename, "%d", mytask);

    strcat(filename,".fa");

    fd = open (filename, O_CREAT | O_RDWR, S_IRWXU);

    write (fd, detail, strlen (detail));

    close (fd);
}

char *
mmaping (char *source)
{
    int src;
    char *sm;
    struct stat statbuf;

    if ((src = open (source, O_RDONLY)) < 0)
    {
        perror (" open source ");
        exit (EXIT_FAILURE);
    }

    if (fstat (src, &statbuf) < 0)
    {
        perror (" fstat source ");
        exit (EXIT_FAILURE);
    }
    printf("task->%d\n",mytask);
    printf("total task->%d\n",ntask);

    int piece = statbuf.st_size/PAGE;
    int share = piece/ntask;

    printf("share->%d\n",share);
    sm = mmap (0,statbuf.st_size/ntask, PROT_READ, MAP_SHARED | MAP_NORESERVE,src, mytask*share*PAGE);

    printf("%lld\n",(long long int) statbuf.st_size);

    if (MAP_FAILED == sm)
    {
        perror (" mmap source ");
        exit (EXIT_FAILURE);
    }

    return sm;
}

int main(int argc, char **argv)
{

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &ntask);
    MPI_Comm_rank(MPI_COMM_WORLD, &mytask);

    char *x = mmaping(argv[1]);
    printf("%d\n",strlen(x));

    write_result(x);

    MPI_Finalize();
    return 0;
}

每个线程读取一个文件的一部分并将其写下来。

在本地机器上工作得很好(mpich)在集群上出错(openmpi)

我的程序在函数 write_result 中的 close(fd) 处终止。

这是错误消息

[kalkyl3:30022] *** Process received signal ***
[kalkyl3:30022] Signal: Segmentation fault (11)
[kalkyl3:30022] Signal code: Address not mapped (1)
[kalkyl3:30022] Failing at address: 0x2b9d952d7000
[kalkyl3:30022] [ 0] /lib64/libpthread.so.0() [0x371900f4a0]
[kalkyl3:30022] [ 1] /lib64/libc.so.6() [0x371892fcef]
[kalkyl3:30022] [ 2] ./a.out(write_result+0x58) [0x400df8]
[kalkyl3:30022] [ 3] ./a.out(main+0x4c) [0x400e6c]
[kalkyl3:30022] [ 4] /lib64/libc.so.6(__libc_start_main+0xfd) [0x371881ecdd]
[kalkyl3:30022] [ 5] ./a.out() [0x400ba9]
[kalkyl3:30022] *** End of error message ***

在搜索了相关问题后,我猜测问题可能来自文件打开和关闭过程。由一个特定线程打开的文件应该由同一个线程关闭。也许 openmpi 无法识别哪个线程打开了它,哪个线程关闭了它。但它在 MPICH 上运行良好。

4

0 回答 0