0

我是 OpenMP 的新手,我正在尝试并行化这个循环:

int ii,jj,kk;                 /* generic counters */
const double c_sq = 1.0/3.0;  /* square of speed of sound */
const double w0 = 4.0/9.0;    /* weighting factor */
const double w1 = 1.0/9.0;    /* weighting factor */
const double w2 = 1.0/36.0;   /* weighting factor */
double u_x,u_y;               /* av. velocities in x and y directions */
double u[NSPEEDS];            /* directional velocities */
double d_equ[NSPEEDS];        /* equilibrium densities */
double u_sq;                  /* squared velocity */
double local_density;         /* sum of densities in a particular cell */

/* loop over the cells in the grid
** NB the collision step is called after
** the propagate step and so values of interest
** are in the scratch-space grid */
//#pragma omp parallel for private (ii, jj, kk, d_equ) shared (cells, tmp_cells)
for(ii=0;ii<params.ny;ii++) {
 for(jj=0;jj<params.nx;jj++) {
  /* don't consider occupied cells */
  if(!obstacles[ii*params.nx + jj]) {
    /* compute local density total */
    local_density = 0.0;
    for(kk=0;kk<NSPEEDS;kk++) {
      local_density += tmp_cells[ii*params.nx + jj].speeds[kk];
    }
    /* compute x velocity component */
    u_x = (tmp_cells[ii*params.nx + jj].speeds[1] +
           tmp_cells[ii*params.nx + jj].speeds[5] +
           tmp_cells[ii*params.nx + jj].speeds[8]
           - (tmp_cells[ii*params.nx + jj].speeds[3] +
              tmp_cells[ii*params.nx + jj].speeds[6] +
              tmp_cells[ii*params.nx + jj].speeds[7]))
      / local_density;
    u_y = (tmp_cells[ii*params.nx + jj].speeds[2] +
           tmp_cells[ii*params.nx + jj].speeds[5] +
           tmp_cells[ii*params.nx + jj].speeds[6]
           - (tmp_cells[ii*params.nx + jj].speeds[4] +
              tmp_cells[ii*params.nx + jj].speeds[7] +
              tmp_cells[ii*params.nx + jj].speeds[8]))
      / local_density;
    /* velocity squared */
    u_sq = u_x * u_x + u_y * u_y;
    /* directional velocity components */
    u[1] =   u_x;        /* east */
    u[2] =         u_y;  /* north */
    u[3] = - u_x;        /* west */
    u[4] =       - u_y;  /* south */
    u[5] =   u_x + u_y;  /* north-east */
    u[6] = - u_x + u_y;  /* north-west */
    u[7] = - u_x - u_y;  /* south-west */
    u[8] =   u_x - u_y;  /* south-east */
    /* equilibrium densities */
    /* zero velocity density: weight w0 */
    d_equ[0] = w0 * local_density * (1.0 - u_sq / (2.0 * c_sq));
    /* axis speeds: weight w1 */
    d_equ[1] = w1 * local_density * (1.0 + u[1] / c_sq
                                     + (u[1] * u[1]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[2] = w1 * local_density * (1.0 + u[2] / c_sq
                                     + (u[2] * u[2]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[3] = w1 * local_density * (1.0 + u[3] / c_sq
                                     + (u[3] * u[3]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[4] = w1 * local_density * (1.0 + u[4] / c_sq
                                     + (u[4] * u[4]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    /* diagonal speeds: weight w2 */
    d_equ[5] = w2 * local_density * (1.0 + u[5] / c_sq
                                     + (u[5] * u[5]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[6] = w2 * local_density * (1.0 + u[6] / c_sq
                                     + (u[6] * u[6]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[7] = w2 * local_density * (1.0 + u[7] / c_sq
                                     + (u[7] * u[7]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    d_equ[8] = w2 * local_density * (1.0 + u[8] / c_sq
                                     + (u[8] * u[8]) / (2.0 * c_sq * c_sq)
                                     - u_sq / (2.0 * c_sq));
    /* relaxation step */
    for(kk=0;kk<NSPEEDS;kk++) {
      cells[ii*params.nx + jj].speeds[kk] = (tmp_cells[ii*params.nx + jj].speeds[kk]
                                             + params.omega *
                                             (d_equ[kk] - tmp_cells[ii*params.nx + jj].speeds[kk]));
      }
    }
  }
}

params.nx = 300 & params.ny = 200 对于 300x200 d2q9 LB 立方体...我评论的 pragma 语句只会导致增加的减少并且还会使雷诺数乱序...我尝试将 2 for 循环合并到通过使其看起来像这样来避免可能的错误共享:

c=params.nx*params.ny;
#pragma omp for private (ii,jj,kk,d_equ) shared (cells, tmp_cells)
for(ii=0;ii<c;ii++) {
  /* don't consider occupied cells */
   if(obstacles[ii]) {
    /* called after propagate, so taking values from scratch space
    ** mirroring, and writing into main grid */
    cells[ii].speeds[1] = tmp_cells[ii].speeds[3];
    cells[ii].speeds[2] = tmp_cells[ii].speeds[4];
    ......
    ....
  }

编译指示仍然无法帮助我加快速度.. 不过我在这里得到了正确的结果.. 自上周六以来我一直在做这件事,并没有在任何我需要的地方结束.. 在网上找不到太多有用的资源。 .我真的很感激帮助...

4

1 回答 1

0

这是我的疯狂猜测:

  • 您的#pragma decaration 可能缺少parallel 关键字,从而阻止了循环被并行化。
  • 您在代码开头声明的大多数变量都没有定义为私有的,因此它们被隐式定义为共享的。这使得它们在第一种情况下容易受到竞争条件的影响(但在第二种情况下不会,因为您的代码是按顺序运行的)。您应该将它们定义为私有或(甚至更好)在 for 循环中声明它们,从而将它们设为私有:
for(i = 0; i < params.nx * params.ny ; i++) {
    double u[NSPEEDS];
    double d_equ[NSPEEDS];
    ...
    int kk;
}
于 2013-10-18T01:56:29.317 回答