我正在尝试编写一个相当有效的文件读入例程。我的数据文件是一个带有几个“框架”的文本文件。每帧有2个标题行和一些项目,如下
<int "nitems">
<float> <float> <float>
<string1> <float> <float> <float>
<string2> <float> <float> <float>
...
<string-nitems> <float> <float> <float>
我当前的实现使用 fstream 来检索数字,但似乎非常慢。我的测试文件包含大约 200 帧,每帧 10.000 行 (~75 Mb),需要 2.5 秒才能读取!
int loadframe() {
_file >> _nat;
_file >> _cell[0] >> _cell[1] >> _cell[2];
for(int i=0,k=0;i<_nat;i++) {
_file >> _types[i] >> _pos[k++] >> _pos[k++] >> _pos[k++]; // this line !!!
}
return 0;
}
_file 是 ifstream(在别处打开),_types 是字符串向量,_cell 和 _pos 是双精度向量。
有没有人建议如何加快速度?
谢谢。
更新 1
用fscanf重写这部分时间从 ~2.5 秒减少到 ~1.8 秒:大约30% 的增益,还不错。_f
现在是FILE* _f = fopen(filename,"r")
对象类型。fscanf 下面的行用于强制转换(如果需要),但不会占用任何大量时间,正如在注释掉它们时可以看到的那样。
int loadxyz() {
char c[16];
float x0,x1,x2;
fscanf(_f,"%d",&_nat);
fscanf(_f,"%f%f%f",&x0,&x1,&x2;
_cell[0]=x1; _cell[1]=x2; _cell[2]=x3;
for(int i=0, k=0;i<_nat;i++,k+=3) {
fscanf(_f,"%s%f%f%f",&c,&x0,&x1,&x2);
_types[i]=c; _pos[k]=x0; _pos[k+1]=x1; _pos[k+2]=x2;
}
return 0;
}
更新 2
根据下面的建议,我编写了一个小型基准程序,这表明 Nim 的解决方案显然最快。在我的情况下,编译器优化没有任何显着影响。对于任何想尝试的人,我在下面添加了来源。需要最近的编译器g++ -std=c++11 readtest.cpp -o readtest
。
谢谢!如果有人还有其他建议,我将非常乐意添加/对它们进行基准测试。
结果(测试文件约为 32Mb)
$ ./readtest
write : took 1.97 seconds
check = 549755289600.00
read1 (ifstream) : took 1.10 seconds
check = 549755289600.00
read2 (fscanf) : took 0.64 seconds
check = 549755289600.00
read3 (stream+strtod) : took 0.41 seconds
这里是来源readtest.cpp
:
#include <stdio.h> // printf, fopen, fclose, fprintf,
#include <stdlib.h> // strtod
#include <fstream> // ifstream
#include <string> // string
#include <ctime> // clock
#define N 1048576 // 1024*1024 number of lines
using namespace std;
void write(string name) {
FILE* f = fopen(name.c_str(),"w");
for(float i=0;i<N;i++)
fprintf(f,"%s %.2f %.2f %.2f\n","x",i,i,i); // write some formatted data
fclose(f);
}
void read1(string name) {
double num,check=0;
string s;
ifstream f(name);
for(int i=0;i<N;i++) {
f >> s;
f >> num;
f >> num;
f >> num;
check+=num;
}
printf("check = %.2f\n",check);
f.close();
}
void read2(string name) {
double num,check=0;
char c[16];
string s;
FILE* f=fopen(name.c_str(),"r");
while(fscanf(f,"%s%lf%lf%lf",&c,&num,&num,&num)!=EOF) {
s = c;
check+=num;
}
printf("check = %.2f\n",check);
fclose(f);
}
void read3(string name) {
string line, s;
double num,check=0;
ifstream f(name);
while(getline(f,line)) {
size_t start = line.find_first_not_of(" \t");
size_t pos = line.find(" ");
char* c = &*(line.begin() + pos + 1);
s = line.substr(start,pos+1);
num = strtod(c+start, &c);
num = strtod(c, &c);
num = strtod(c, &c);
check+=num;
}
printf("check = %.2f\n",check);
f.close();
}
int main() {
clock_t start, end;
string name("testfile.dat");
start = clock();
write(name);
end = clock();
printf("write : took %.2f seconds\n",double(end-start)/CLOCKS_PER_SEC);
start = clock();
read1(name);
end = clock();
printf("read1 (ifstream) : took %.2f seconds\n",double(end-start)/CLOCKS_PER_SEC);
start = clock();
read2(name);
end = clock();
printf("read2 (fscanf) : took %.2f seconds\n",double(end-start)/CLOCKS_PER_SEC);
start = clock();
read3(name);
end = clock();
printf("read3 (stream+strtod) : took %.2f seconds\n",double(end-start)/CLOCKS_PER_SEC);
}