更新:这是来自vartec 答案的循环缓冲区技术的实现(基于我的原始答案,为好奇的人保留在下面):
from cStringIO import StringIO
class FifoFileBuffer(object):
def __init__(self):
self.buf = StringIO()
self.available = 0 # Bytes available for reading
self.size = 0
self.write_fp = 0
def read(self, size = None):
"""Reads size bytes from buffer"""
if size is None or size > self.available:
size = self.available
size = max(size, 0)
result = self.buf.read(size)
self.available -= size
if len(result) < size:
self.buf.seek(0)
result += self.buf.read(size - len(result))
return result
def write(self, data):
"""Appends data to buffer"""
if self.size < self.available + len(data):
# Expand buffer
new_buf = StringIO()
new_buf.write(self.read())
self.write_fp = self.available = new_buf.tell()
read_fp = 0
while self.size <= self.available + len(data):
self.size = max(self.size, 1024) * 2
new_buf.write('0' * (self.size - self.write_fp))
self.buf = new_buf
else:
read_fp = self.buf.tell()
self.buf.seek(self.write_fp)
written = self.size - self.write_fp
self.buf.write(data[:written])
self.write_fp += len(data)
self.available += len(data)
if written < len(data):
self.write_fp -= self.size
self.buf.seek(0)
self.buf.write(data[written:])
self.buf.seek(read_fp)
原始答案(由上述答案取代):
您可以使用缓冲区并跟踪起始索引(读取文件指针),当它变得太大时偶尔压缩它(这应该会产生相当好的摊销性能)。
例如,像这样包装一个 StringIO 对象:
from cStringIO import StringIO
class FifoBuffer(object):
def __init__(self):
self.buf = StringIO()
def read(self, *args, **kwargs):
"""Reads data from buffer"""
self.buf.read(*args, **kwargs)
def write(self, *args, **kwargs):
"""Appends data to buffer"""
current_read_fp = self.buf.tell()
if current_read_fp > 10 * 1024 * 1024:
# Buffer is holding 10MB of used data, time to compact
new_buf = StringIO()
new_buf.write(self.buf.read())
self.buf = new_buf
current_read_fp = 0
self.buf.seek(0, 2) # Seek to end
self.buf.write(*args, **kwargs)
self.buf.seek(current_read_fp)