代码:
double x(){return (double)rand()/(double)RAND_MAX;}
double y(){return (double)rand()/(double)RAND_MAX;}
double z(){return (double)rand()/(double)RAND_MAX;}
int d(double x, double y, double z){
if ( ( (pow(x,2)+pow(y,2)) <1 ) && ( z<=1 && z>=0 )) return 1;
return 0;
}
double f(double x, double y, double z){
return 1;
}
#pragma omp parallel default(none) private(id,numt,j,local_sum,local_good_dots,local_coi,x_,y_,z_) shared(total_sum,good_dots,count_of_iterations)
{
local_coi = count_of_iterations;
id = omp_get_thread_num() + 1;
numt = omp_get_num_threads();
#pragma omp for
for (j = 1; j <= local_coi; j++){
x_=x();
y_=y();
z_=z();
if (d(x_,y_,z_) == 1){
local_sum += f(x_,y_,z_);
local_good_dots += 1;
}
}
#pragma omp critical
{
total_sum = total_sum + local_sum;
good_dots = good_dots + local_good_dots;
}
}
f()
注释:此代码是蒙特卡洛方法的实现,用于计算面积函数的三维积分d()
。
我希望这段代码在多线程模式(openmp)下运行得更快。
但是出了点问题。
经过几个小时的修改(reduction
在 openmp pragma 中,if-condition 的简化(如f(x_,y_,z_) * d(x_,y_,z_)
))我不明白,为什么这个简单的循环在更多的线程上变得更慢。
但是在我为循环之前的每个坐标生成一个 3 维数组并将其放入之后shared
,我的程序变得更快。
所以,问题:
如何修改此代码以及并行块中允许哪些功能(操作)?
PS:如我所见,该rand
功能是不允许的(或者我错了?)
感谢帮助!
修改(在@HristoIliev 的帮助下)
double x(){return (double)rand()/(double)RAND_MAX;}
double y(){return (double)rand()/(double)RAND_MAX;}
double z(){return (double)rand()/(double)RAND_MAX;}
int d(double x, double y, double z){
if ( ( (pow(x,2)+pow(y,2)) <1 ) && ( z<=1 && z>=0 )) return 1;
return 0;
}
double f(double x, double y, double z){
return 1;
}
#pragma omp parallel default(none) private(j,local_coi,x_,y_,z_) shared(count_of_iterations) reduction(+:total_sum,good_dots)
{
local_coi = count_of_iterations;
#pragma omp for(prng)
for (j = 1; j <= local_coi; j++){
#pragma omp critical(prng)
{
x_=x();
y_=y();
z_=z();
}
if (d(x_,y_,z_) == 1){
total_sum += f(x_,y_,z_);
good_dots += 1;
}
}
}