我知道你正在寻找一个图书馆,但是当我读到这个问题时,我想我会自己写。所以这里是:
import os
class View:
def __init__(self, f, offset, length):
self.f = f
self.f_offset = offset
self.offset = 0
self.length = length
def seek(self, offset, whence=0):
if whence == os.SEEK_SET:
self.offset = offset
elif whence == os.SEEK_CUR:
self.offset += offset
elif whence == os.SEEK_END:
self.offset = self.length+offset
else:
# Other values of whence should raise an IOError
return self.f.seek(offset, whence)
return self.f.seek(self.offset+self.f_offset, os.SEEK_SET)
def tell(self):
return self.offset
def read(self, size=-1):
self.seek(self.offset)
if size<0:
size = self.length-self.offset
size = max(0, min(size, self.length-self.offset))
self.offset += size
return self.f.read(size)
if __name__ == "__main__":
f = open('test.txt', 'r')
views = []
offsets = [i*11 for i in range(10)]
for o in offsets:
f.seek(o+1)
length = int(f.read(1))
views.append(View(f, o+2, length))
f.seek(0)
completes = {}
for v in views:
completes[v.f_offset] = v.read()
v.seek(0)
import collections
strs = collections.defaultdict(str)
for i in range(3):
for v in views:
strs[v.f_offset] += v.read(3)
strs = dict(strs) # We want it to raise KeyErrors after that.
for offset, s in completes.iteritems():
print offset, strs[offset], completes[offset]
assert strs[offset] == completes[offset], "Something went wrong!"
我编写了另一个脚本来生成“test.txt”文件:
import string, random
f = open('test.txt', 'w')
for i in range(10):
rand_list = list(string.ascii_letters)
random.shuffle(rand_list)
rand_str = "".join(rand_list[:9])
f.write(".%d%s" % (len(rand_str), rand_str))
它对我有用。我测试的文件不是像你这样的二进制文件,它们也没有你的那么大,但我希望这可能有用。如果没有,那么谢谢你,这是一个很好的挑战:D
另外,我想知道,如果这些实际上是多个文件,为什么不使用某种存档文件格式,并使用它们的库来读取它们呢?
希望能帮助到你。