如果可以,您可能会发现mmap
ing 文件将是最简单的。mmap
将文件的(一部分)映射到内存中,因此整个文件基本上可以作为字节数组进行访问。在您的情况下,您可能无法一次映射整个文件,它看起来像:
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/mman.h>
/* ... */
struct stat stat_buf;
long pagesz = sysconf(_SC_PAGESIZE);
int fd = fileno(stream);
off_t line_start = 0;
char *file_chunk = NULL;
char *input_line;
off_t cur_off = 0;
off_t map_offset = 0;
/* map 16M plus pagesize to ensure any record <= 16M will always fit in the mapped area */
size_t map_size = 16*1024*1024+pagesz;
if (map_offset + map_size > stat_buf.st_size) {
map_size = stat_buf.st_size - map_offset;
}
fstat(fd, &stat_buf);
/* map the first chunk of the file */
file_chunk = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, map_offset);
// until we reach the end of the file
while (cur_off < stat_buf.st_size) {
/* check if we're about to read outside the current chunk */
if (!(cur_off-map_offset < map_size)) {
// destroy the previous mapping
munmap(file_chunk, map_size);
// round down to the page before line_start
map_offset = (line_start/pagesz)*pagesz;
// limit mapped region to size of file
if (map_offset + map_size > stat_buf.st_size) {
map_size = stat_buf.st_size - map_offset;
}
// map the next chunk
file_chunk = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, map_offset);
// adjust the line start for the new mapping
input_line = &file_chunk[line_start-map_offset];
}
if (file_chunk[cur_off-map_offset] == '\n') {
// found a new line, process the current line
process_line(input_line, cur_off-line_start);
// set up for the next one
line_start = cur_off+1;
input_line = &file_chunk[line_start-map_offset];
}
cur_off++;
}
最复杂的是避免制作太大的映射。您也许可以使用映射整个文件
char *file_data = mmap(NULL, stat_buf.st_size, PROT_READ, MAP_SHARED, fd, 0);