我在我的一个应用程序中使用了一个滑动窗口(实际上,几层滑动窗口相互叠加,但这超出了本讨论的范围)。该窗口通过CreateFileMapping()
and使用内存映射文件视图MapViewOfFile()
,然后我在其之上有一个抽象层。我向抽象层询问我需要的任何字节范围,它确保相应地调整文件映射和文件视图,以便这些字节在内存中。每次请求新的字节范围时,仅在需要时调整文件视图。
文件视图在页面边界上定位和调整大小,甚至是系统粒度的倍数,如GetSystemInfo()
. 仅仅因为扫描到达给定字节范围的末尾并不一定意味着它已经到达页面边界的末尾,因此下一次扫描可能根本不需要改变文件视图,下一个字节已经在内存中。如果范围的第一个请求字节超出映射页面的右侧边界,则文件视图的左边缘将调整为请求页面的左侧边界,并且左侧的任何页面都将被取消映射。如果范围中最后请求的字节超出最右侧映射页面的右侧边界,则映射新页面并将其添加到文件视图中。
一旦你开始编码,这听起来比实际实现要复杂得多:
在文件中创建视图
听起来您正在扫描固定大小的块中的字节,因此这种方法非常快速且非常有效。基于这种技术,我可以相当快地从头到尾依次扫描多个GIGBYTE文件,在我最慢的机器上通常一分钟或更短时间。如果您的文件比系统粒度小,甚至只有几兆字节,您几乎不会注意到任何时间流逝(除非您的扫描本身很慢)。
更新:这是我使用的简化变体:
class FileView
{
private:
DWORD m_AllocGran;
DWORD m_PageSize;
HANDLE m_File;
unsigned __int64 m_FileSize;
HANDLE m_Map;
unsigned __int64 m_MapSize;
LPBYTE m_View;
unsigned __int64 m_ViewOffset;
DWORD m_ViewSize;
void CloseMap()
{
CloseView();
if (m_Map != NULL)
{
CloseHandle(m_Map);
m_Map = NULL;
}
m_MapSize = 0;
}
void CloseView()
{
if (m_View != NULL)
{
UnmapViewOfFile(m_View);
m_View = NULL;
}
m_ViewOffset = 0;
m_ViewSize = 0;
}
bool EnsureMap(unsigned __int64 Size)
{
// do not exceed EOF or else the file on disk will grow!
Size = min(Size, m_FileSize);
if ((m_Map == NULL) ||
(m_MapSize != Size))
{
// a new map is needed...
CloseMap();
ULARGE_INTEGER ul;
ul.QuadPart = Size;
m_Map = CreateFileMapping(m_File, NULL, PAGE_READONLY, ul.HighPart, ul.LowPart, NULL);
if (m_Map == NULL)
return false;
m_MapSize = Size;
}
return true;
}
bool EnsureView(unsigned __int64 Offset, DWORD Size)
{
if ((m_View == NULL) ||
(Offset < m_ViewOffset) ||
((Offset + Size) > (m_ViewOffset + m_ViewSize)))
{
// the requested range is not already in view...
// round down the offset to the nearest allocation boundary
unsigned __int64 ulNewOffset = ((Offset / m_AllocGran) * m_AllocGran);
// round up the size to the next page boundary
DWORD dwNewSize = ((((Offset - ulNewOffset) + Size) + (m_PageSize-1)) & ~(m_PageSize-1));
// if the new view will exceed EOF, truncate it
unsigned __int64 ulOffsetInFile = (ulNewOffset + dwNewSize);
if (ulOffsetInFile > m_FileSize)
dwNewViewSize -= (ulOffsetInFile - m_FileSize);
if ((m_View == NULL) ||
(m_ViewOffset != ulNewOffset) ||
(m_ViewSize != ulNewSize))
{
// a new view is needed...
CloseView();
// make sure the memory map is large enough to contain the entire view
if (!EnsureMap(ulNewOffset + dwNewSize))
return false;
ULARGE_INTEGER ul;
ul.QuadPart = ulNewOffset;
m_View = (LPBYTE) MapViewOfFile(m_Map, FILE_MAP_READ, ul.HighPart, ul.LowPart, dwNewSize);
if (m_View == NULL)
return false;
m_ViewOffset = ulNewOffset;
m_ViewSize = dwNewSize;
}
}
return true;
}
public:
FileView() :
m_AllocGran(0),
m_PageSize(0),
m_File(INVALID_HANDLE_VALUE),
m_FileSize(0),
m_Map(NULL),
m_MapSize(0),
m_View(NULL),
m_ViewOffset(0),
m_ViewSize(0)
{
// map views need to be positioned on even multiples
// of the system allocation granularity. let's size
// them on even multiples of the system page size...
SYSTEM_INFO si = {0};
if (GetSystemInfo(&si))
{
m_AllocGran = si.dwAllocationGranularity;
m_PageSize = si.dwPageSize;
}
}
~FileView()
{
CloseFile();
}
bool OpenFile(LPTSTR FileName)
{
CloseFile();
if ((m_AllocGran == 0) || (m_PageSize == 0))
return false;
HANDLE hFile = CreateFile(FileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
if (hFile == INVALID_HANDLE_VALUE)
return false;
ULARGE_INTEGER ul;
ul.LowPart = GetFileSize(hFile, &ul.HighPart);
if ((ul.LowPart == INVALID_FILE_SIZE) && (GetLastError() != 0))
{
CloseHandle(hFile);
return false;
}
m_File = hFile;
m_FileSize = ul.QuadPart;
return true;
}
void CloseFile()
{
CloseMap();
if (m_File != INVALID_HANDLE_VALUE)
{
CloseHandle(m_File);
m_File = INVALID_HANDLE_VALUE;
}
m_FileSize = 0;
}
bool AccessBytes(unsigned __int64 Offset, DWORD Size, LPBYTE *Bytes, DWORD *Available)
{
if (Bytes) *Bytes = NULL;
if (Available) *Available = 0;
if ((m_FileSize != 0) && (offset < m_FileSize))
{
// make sure the requested range is in view
if (!EnsureView(Offset, Size))
return false;
// near EOF, the available bytes may be less than requested
DWORD dwOffsetInView = (Offset - m_ViewOffset);
if (Bytes) *Bytes = &m_View[dwOffsetInView];
if (Available) *Available = min(m_ViewSize - dwOffsetInView, Size);
}
return true;
}
};
.
FileView fv;
if (fv.OpenFile(TEXT("C:\\path\\file.ext")))
{
LPBYTE data;
DWORD len;
unsigned __int64 offset = 0, filesize = fv.FileSize();
while (offset < filesize)
{
if (!fv.AccessBytes(offset, some size here, &data, &len))
break; // error
if (len == 0)
break; // unexpected EOF
// use data up to len bytes as needed...
offset += len;
}
fv.CloseFile();
}
此代码旨在允许以任何数据大小在文件中的任何位置随机跳转。由于您是按顺序读取字节,因此可以根据需要简化一些逻辑。