2

我尝试使用 FFTW 3.3 的刨床 fftw_mpi_plan_dft_r2c_2d 计算傅立叶变换。不幸的是,我无法让它工作。如果 N0 等于处理器数量 (nb_proc),则结果是正确的,但当 N0 != nb_proc 时结果是错误的。

显示我的问题的示例:

#include <stdio.h>
#include <complex.h>
#include <fftw3-mpi.h>

int main(int argc, char **argv)
{
/* if N0 (=ny) is equal to nb_proc, result are OK */
/* if N0 is not equal to nb_proc => bug */
const ptrdiff_t N0 = 4, N1 = 4;
int coef_norm = N0*N1;
fftw_plan plan_forward;
double *carrayX;
fftw_complex *carrayK;
ptrdiff_t n_alloc_local, i, j;
ptrdiff_t nX0loc, iX0loc_start, nK0loc, nK1loc;
/* X and K denote physical and Fourier spaces. */
int rank, nb_proc, irank;

MPI_Init(&argc, &argv);
fftw_mpi_init();

/*DETERMINE RANK OF THIS PROCESSOR*/
MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
/*DETERMINE TOTAL NUMBER OF PROCESSORS*/
MPI_Comm_size(MPI_COMM_WORLD, &nb_proc);

if (rank==0) printf("program test_fftw3_2Dmpi_simple\n");
printf("I'm rank (processor number) %i of size %i\n", rank, nb_proc);



n_alloc_local = fftw_mpi_local_size_2d(N0, N1/2+1, MPI_COMM_WORLD,
                                       &nX0loc, &iX0loc_start);

carrayX = fftw_alloc_real(2 * n_alloc_local);
carrayK = fftw_alloc_complex(n_alloc_local);

/* create plan for out-of-place r2c DFT */
plan_forward = fftw_mpi_plan_dft_r2c_2d(N0, N1, 
                                        carrayX, carrayK, 
                                        MPI_COMM_WORLD,
                                        FFTW_MEASURE);


nK0loc = nX0loc;
nK1loc = N1/2+1;

/* initialize carrayX to a constant */
for (i = 0; i < nX0loc; ++i) for (j = 0; j < N1; ++j)
    carrayX[i*N1 + j] = 1.;

/* compute forward transform and normalize */
fftw_execute(plan_forward);
for (i = 0; i < nK0loc; ++i) for (j = 0; j < nK1loc; ++j)
    carrayK[i*nK1loc + j] = carrayK[i*nK1loc + j]/coef_norm;

/* print carrayK, there should be only one 1 in the first case for rank=0 */
for (irank = 0; irank<nb_proc; irank++)
{
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == irank)
    {
        for (i = 0; i < nK0loc; ++i) for (j = 0; j < nK1loc; ++j)
        {
             printf("rank = %i, carrayK[%ti*nK1loc + %ti] = (%6.4f, %6.4f)\n", 
                    rank, i, j,
                    creal(carrayK[i*nK1loc + j]), 
                    cimag(carrayK[i*nK1loc + j]));
        }
        printf("\n");
    }
}
MPI_Barrier(MPI_COMM_WORLD);


fftw_destroy_plan(plan_forward);
MPI_Finalize();
}

这个例子有问题,但我不明白是什么。

对于这种情况(N0 = 4,N1 = 4),结果是正确的

mpirun -np 4 ./test_fftw3_2Dmpi_simple

但不与

mpirun -np 2 ./test_fftw3_2Dmpi_simple

PS:与标志 FFTW_MPI_TRANSPOSED_OUT 相同。

4

0 回答 0