1

我有这个串行代码,我正在尝试使用 MPI 将其转换为并行代码。但是,我似乎无法在MPI_Scatter()不崩溃的情况下使该功能正常工作。该函数循环调用一个数组cells并修改一些值。

以下是原始序列号:

int accelerate_flow(const t_param params, t_speed* cells, int* obstacles)
{
  register int ii,jj;     /* generic counters */
  register float w1,w2;  /* weighting factors */
  /* compute weighting factors */
  w1 = params.density * params.accel * oneover9;
  w2 = params.density * params.accel * oneover36;

  int i;

  /* modify the first column of the grid */
  jj=0;

  for(ii=0;ii<params.ny;ii++)
  {

      if( !obstacles[ii*params.nx] && (cells[ii*params.nx].speeds[3] > w1 &&
          cells[ii*params.nx].speeds[6] > w2 && cells[ii*params.nx].speeds[7] > w2))  
      {
          /* increase 'east-side' densities */
          cells[ii*params.nx].speeds[1] += w1;
          cells[ii*params.nx].speeds[5] += w2;
          cells[ii*params.nx].speeds[8] += w2;
         /* decrease 'west-side' densities */
         cells[ii*params.nx].speeds[3] -= w1;
         cells[ii*params.nx].speeds[6] -= w2;
         cells[ii*params.nx].speeds[7] -= w2;
      }
  }

return EXIT_SUCCESS;

}

这是我使用 MPI 的尝试:

int accelerate_flow(const t_param params, t_speed* cells, int* obstacles, int myrank, int ntasks)
{
    register int ii,jj = 0;;     /* generic counters */
    register float w1,w2;  /* weighting factors */
    int recvSize;
    int cellsSendTag = 123, cellsRecvTag = 321;
    int size = params.ny / ntasks, i;
    MPI_Request* cellsSend, *cellsRecieve;
    MPI_Status *status;

    /* compute weighting factors */
    w1 = params.density * params.accel * oneover9;
    w2 = params.density * params.accel * oneover36;

    t_speed* recvCells = (t_speed*)malloc(size*sizeof(t_speed)*params.nx);

    MPI_Scatter(cells, sizeof(t_speed)*params.nx*params.ny, MPI_BYTE, recvCells, 
      size*sizeof(t_speed)*params.nx, MPI_BYTE, 0, MPI_COMM_WORLD);

    for(ii= 0;ii < size;ii++)
    {
        if( !obstacles[ii*params.nx] && (recvCells[ii*params.nx].speeds[3] > w1 &&
             recvCells[ii*params.nx].speeds[6] > w2 && recvCells[ii*params.nx].speeds[7] > w2))
        {

           /* increase 'east-side' densities */
           recvCells[ii*params.nx].speeds[1] += w1;
           recvCells[ii*params.nx].speeds[5] += w2;
           recvCells[ii*params.nx].speeds[8] += w2;
           /* decrease 'west-side' densities */
           recvCells[ii*params.nx].speeds[3] -= w1;
           recvCells[ii*params.nx].speeds[6] -= w2;
           recvCells[ii*params.nx].speeds[7] -= w2;
        }
   }

MPI_Gather(recvCells, size*sizeof(t_speed)*params.nx, MPI_BYTE, cells, params.ny*sizeof(t_speed)*params.nx, MPI_BYTE, 0, MPI_COMM_WORLD);

 return EXIT_SUCCESS;

}

这是t_speed结构:

typedef struct {
float speeds[NSPEEDS];
} t_speed;

params.nx = 300,params.ny = 200

非常感谢任何帮助。谢谢。

4

1 回答 1

3

第一个 count 参数MPI_Scatter是要发送到每个进程的元素数,而不是总数。这里,发送计数和接收计数相同,为 nx*ny/ntasks;所以你会有类似的东西

int count=params.nx*params.ny/ntasks;

MPI_Scatter(cells,    sizeof(t_speed)*count, MPI_BYTE, 
            recvCells,sizeof(t_speed)*count, MPI_BYTE, 0, MPI_COMM_WORLD);

请注意,这仅在 ntasks 均分 nx*ny 时才有效,否则您必须使用Scatterv.

于 2012-08-26T14:01:38.743 回答