我有一个信息检索和存储课程项目,第一部分我必须找到从硬盘读取大文件的最佳缓冲区大小。我们的 ta 说随着缓冲区大小增加到某个点(通常是 4 个字节),读取速度会提高,但之后会降低。但是使用下面的代码,无论缓冲区大小还是文件大小,它都会增加(我已经在 100 mb 上对其进行了测试)。据我所知,缓冲仅在并行异步进程(如线程)中有意义,并且当文件被碎片整理和\或查找文件目录和地址的成本(对于disk) 足够重要,那么问题是否与我的代码或 ifstream 处理事情的方式有关,或者这些条件在这里不成立?
ifstream in("D:ISR\\Articles.dat", std::ifstream::binary);
if(in)
{
in.seekg(0, in.end);
int length = in.tellg();
length = 100 * 1024 * 1024;
int bufferSize = 2;
int blockSize = 1024;//1kB
int numberOfBlocks = length / blockSize;
if(length % blockSize > 0) numberOfBlocks++;
clock_t t;
double time;
for(int i = 0; i < 5; i++)
{
in.seekg(0, in.beg);
int position = 0;
int bufferPosition;
char* streamBuffer = new char[bufferSize];
in.rdbuf()->pubsetbuf(streamBuffer, bufferSize);
t = clock();
for(int i = 0; i < numberOfBlocks; i++)
{
char* buffer = new char[blockSize];
bufferPosition = 0;
while(bufferPosition < blockSize && position < length)
{
in.read(buffer + bufferPosition, bufferSize);
position += bufferSize;
bufferPosition += bufferSize;
}
delete[] buffer;
}
t = clock() - t;
time = double(t) / CLOCKS_PER_SEC;
cout << "Buffer size : " << bufferSize << " -> Total time in seconds : " << time << "\n";
bufferSize *= 2;
}