0

我正在使用 MPI 测试 FFTW 1D c2c 变换。在 FFTW3 教程上编译并运行 2D 示例后,我发现使用 8 个节点的执行速度比仅使用一个节点(使用 4096x4096 等大尺寸)快 2 倍。

所以我将这个示例修改为 1D,但出现了问题,因为我看到了这个时间:

1 node  = 0.763668 s
2 nodes = 1.540884 s
4 nodes = 1.336446 s
8 nodes = 0.851646 s

我的代码:

    #include <fftw3-mpi.h>
    # include <stdlib.h>
    # include <stdio.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    # include <time.h>
    #include <math.h>

    int main(int argc, char **argv)
    {
       //const ptrdiff_t N0 = 4096;
       const ptrdiff_t N0 = 4194304 ;
       //const ptrdiff_t N0 = 8388608;
       fftw_plan planForw,planBack;
       fftw_complex *data,*dataOut,*data2;
       ptrdiff_t alloc_local, local_ni, local_i_start, i, j,local_no, local_o_start;
       int index,size;
       double startwtime, endwtime;
       MPI_Init(&argc, &argv);
       fftw_mpi_init();
       MPI_Comm_rank(MPI_COMM_WORLD,&index);
       MPI_Comm_size(MPI_COMM_WORLD,&size);

       /* get local data size and allocate */
       alloc_local = fftw_mpi_local_size_1d(N0, MPI_COMM_WORLD,FFTW_FORWARD, FFTW_ESTIMATE,
                                                  &local_ni, &local_i_start,&local_no, &local_o_start);
       data = fftw_alloc_complex(alloc_local);
       dataOut = fftw_alloc_complex(alloc_local);
       data2 = fftw_alloc_complex(alloc_local);
             /* create plan  */
       planForw = fftw_mpi_plan_dft_1d(N0, data, data2, MPI_COMM_WORLD,
                                         FFTW_FORWARD, FFTW_ESTIMATE);
       planBack = fftw_mpi_plan_dft_1d(N0, data2, dataOut, MPI_COMM_WORLD,
                                         FFTW_BACKWARD, FFTW_ESTIMATE);
       /* initialize data to some function my_function(x,y) */
       for (i = 0; i < local_ni; ++i) 
       {
        data[i][0] =rand() / (double)RAND_MAX;
        data[i][1] =rand() / (double)RAND_MAX;
       }
       if(index==0){
        startwtime = MPI_Wtime();

        }

        fftw_execute(planForw);
        fftw_execute(planBack);
        if(index==0){
        endwtime = MPI_Wtime();
            printf("wall clock time = %f\n",
                           endwtime-startwtime);


       }

             fftw_destroy_plan(planForw);
         fftw_destroy_plan(planBack);
             MPI_Finalize();
}
4

0 回答 0