我有一个巨大的vector<vector<int>>
(18M x 128)。我经常想取 2 行这个向量并通过这个函数比较它们:
int getDiff(int indx1, int indx2) {
int result = 0;
int pplus, pminus, tmp;
for (int k = 0; k < 128; k += 2) {
pplus = nodeL[indx2][k] - nodeL[indx1][k];
pminus = nodeL[indx1][k + 1] - nodeL[indx2][k + 1];
tmp = max(pplus, pminus);
if (tmp > result) {
result = tmp;
}
}
return result;
}
如您所见,函数循环通过两个行向量进行一些减法,最后返回最大值。这个函数将被使用一百万次,所以我想知道它是否可以通过 SSE 指令加速。我使用 Ubuntu 12.04 和 gcc。
当然这是微优化,但如果你能提供一些帮助会很有帮助,因为我对 SSE 一无所知。提前致谢
基准:
int nofTestCases = 10000000;
vector<int> nodeIds(nofTestCases);
vector<int> goalNodeIds(nofTestCases);
vector<int> results(nofTestCases);
for (int l = 0; l < nofTestCases; l++) {
nodeIds[l] = randomNodeID(18000000);
goalNodeIds[l] = randomNodeID(18000000);
}
double time, result;
time = timestamp();
for (int l = 0; l < nofTestCases; l++) {
results[l] = getDiff2(nodeIds[l], goalNodeIds[l]);
}
result = timestamp() - time;
cout << result / nofTestCases << "s" << endl;
time = timestamp();
for (int l = 0; l < nofTestCases; l++) {
results[l] = getDiff(nodeIds[l], goalNodeIds[l]);
}
result = timestamp() - time;
cout << result / nofTestCases << "s" << endl;
在哪里
int randomNodeID(int n) {
return (int) (rand() / (double) (RAND_MAX + 1.0) * n);
}
/** Returns a timestamp ('now') in seconds (incl. a fractional part). */
inline double timestamp() {
struct timeval tp;
gettimeofday(&tp, NULL);
return double(tp.tv_sec) + tp.tv_usec / 1000000.;
}