考虑以下将计算分布在向量元素上的程序(我以前从未使用过 std::thread ):
// vectorop.cpp
// compilation: g++ -O3 -std=c++0x vectorop.cpp -o vectorop -lpthread
// execution: time ./vectorop 100 50000000
// (100: number of threads, 50000000: vector size)
#include <iostream>
#include <iomanip>
#include <cstdio>
#include <vector>
#include <thread>
#include <cmath>
#include <algorithm>
#include <numeric>
// Some calculation that takes some time
template<typename T>
void f(std::vector<T>& v, unsigned int first, unsigned int last) {
for (unsigned int i = first; i < last; ++i) {
v[i] = std::sin(v[i])+std::exp(std::cos(v[i]))/std::exp(std::sin(v[i]));
}
}
// Main
int main(int argc, char* argv[]) {
// Variables
const int nthreads = (argc > 1) ? std::atol(argv[1]) : (1);
const int n = (argc > 2) ? std::atol(argv[2]) : (100000000);
double x = 0;
std::vector<std::thread> t;
std::vector<double> v(n);
// Initialization
std::iota(v.begin(), v.end(), 0);
// Start threads
for (unsigned int i = 0; i < n; i += std::max(1, n/nthreads)) {
// question 1:
// how to compute the first/last indexes attributed to each thread
// with a more "elegant" formula ?
std::cout<<i<<" "<<std::min(i+std::max(1, n/nthreads), v.size())<<std::endl;
t.push_back(std::thread(f<double>, std::ref(v), i, std::min(i+std::max(1, n/nthreads), v.size())));
}
// Finish threads
for (unsigned int i = 0; i < t.size(); ++i) {
t[i].join();
}
// question 2:
// how to be sure that all threads are finished here ?
// how to "wait" for the end of all threads ?
// Finalization
for (unsigned int i = 0; i < n; ++i) {
x += v[i];
}
std::cout<<std::setprecision(15)<<x<<std::endl;
return 0;
}
代码中已经嵌入了两个问题。
第三个问题是:这段代码完全没问题,还是可以使用 std::threads 以更优雅的方式编写?我不知道使用 std::thread 的“良好做法”...