通过 const 引用传递参数以开始:
matrix mult_std(matrix const& a, matrix const& b) {
为了向您提供更多详细信息,我们需要了解使用的其他方法的详细信息。
要回答为什么原始方法快 4 倍,我们需要查看原始方法。
这个问题无疑是你的问题,因为这个问题之前已经解决了一百万次。
此外,在提出此类问题时,请始终提供带有适当输入的可编译源,以便我们实际构建和运行代码并查看发生了什么。
没有代码,我们只是猜测。
编辑
修复原始 C 代码中的主要错误后(缓冲区溢出)
我已经更新了代码以公平比较并排运行测试:
// INCLUDES -------------------------------------------------------------------
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
// DEFINES -------------------------------------------------------------------
// The original problem was here. The MAXDIM was 500. But we were using arrays
// that had a size of 512 in each dimension. This caused a buffer overrun that
// the dim variable and caused it to be reset to 0. The result of this was causing
// the multiplication loop to fall out before it had finished (as the loop was
// controlled by this global variable.
//
// Everything now uses the MAXDIM variable directly.
// This of course gives the C code an advantage as the compiler can optimize the
// loop explicitly for the fixed size arrays and thus unroll loops more efficiently.
#define MAXDIM 512
#define RUNS 10
// MATRIX FUNCTIONS ----------------------------------------------------------
class matrix
{
public:
matrix(int dim)
: dim_(dim)
{
data_ = new int[dim_ * dim_];
}
inline int dim() const {
return dim_;
}
inline int& operator()(unsigned row, unsigned col) {
return data_[dim_*row + col];
}
inline int operator()(unsigned row, unsigned col) const {
return data_[dim_*row + col];
}
private:
int dim_;
int* data_;
};
// ---------------------------------------------------
void random_matrix(int (&matrix)[MAXDIM][MAXDIM]) {
for (int r = 0; r < MAXDIM; r++)
for (int c = 0; c < MAXDIM; c++)
matrix[r][c] = rand() % 100;
}
void random_matrix_class(matrix& matrix) {
for (int r = 0; r < matrix.dim(); r++)
for (int c = 0; c < matrix.dim(); c++)
matrix(r, c) = rand() % 100;
}
template<typename T, typename M>
float run(T f, M const& a, M const& b, M& c)
{
float time = 0;
for (int i = 0; i < RUNS; i++) {
struct timeval start, end;
gettimeofday(&start, NULL);
f(a,b,c);
gettimeofday(&end, NULL);
long s = start.tv_sec * 1000 + start.tv_usec / 1000;
long e = end.tv_sec * 1000 + end.tv_usec / 1000;
time += e - s;
}
return time / RUNS;
}
// SEQ MULTIPLICATION ----------------------------------------------------------
int* mult_seq(int const(&a)[MAXDIM][MAXDIM], int const(&b)[MAXDIM][MAXDIM], int (&z)[MAXDIM][MAXDIM]) {
for (int r = 0; r < MAXDIM; r++) {
for (int c = 0; c < MAXDIM; c++) {
z[r][c] = 0;
for (int i = 0; i < MAXDIM; i++)
z[r][c] += a[r][i] * b[i][c];
}
}
}
void mult_std(matrix const& a, matrix const& b, matrix& z) {
for (int r = 0; r < a.dim(); r++) {
for (int c = 0; c < a.dim(); c++) {
z(r,c) = 0;
for (int i = 0; i < a.dim(); i++)
z(r,c) += a(r,i) * b(i,c);
}
}
}
// MAIN ------------------------------------------------------------------------
using namespace std;
int main(int argc, char* argv[]) {
srand(time(NULL));
int matrix_a[MAXDIM][MAXDIM];
int matrix_b[MAXDIM][MAXDIM];
int matrix_c[MAXDIM][MAXDIM];
random_matrix(matrix_a);
random_matrix(matrix_b);
printf("%d ", MAXDIM);
printf("%f \n", run(mult_seq, matrix_a, matrix_b, matrix_c));
matrix a(MAXDIM);
matrix b(MAXDIM);
matrix c(MAXDIM);
random_matrix_class(a);
random_matrix_class(b);
printf("%d ", MAXDIM);
printf("%f \n", run(mult_std, a, b, c));
return 0;
}
现在的结果:
$ g++ t1.cpp
$ ./a.exe
512 1270.900000
512 3308.800000
$ g++ -O3 t1.cpp
$ ./a.exe
512 284.900000
512 622.000000
由此我们可以看到,在完全优化后,C 代码的速度大约是 C++ 代码的两倍。我在代码中看不到原因。