有没有办法从文件描述符(不是类似 IO 的对象)直接读取到bytearray?
现在我使用一个临时FileIO
对象进行调解,例如:
def fd_readinto(fd, ba):
fio = io.FileIO(fd, closefd = False)
return fio.readinto(ba)
有没有办法从文件描述符(不是类似 IO 的对象)直接读取到bytearray?
现在我使用一个临时FileIO
对象进行调解,例如:
def fd_readinto(fd, ba):
fio = io.FileIO(fd, closefd = False)
return fio.readinto(ba)
没有功能可以做到这一点,您的方法已经是最快的方法。
我打算建议bytearray(mmap)
,,array.fromfile
甚至是使用 and 的自制程序os.read()
,bytearray
但是memoryview
尖叫得很快。(这是有道理的,因为它只执行一个系统调用。)FileIO.readinto
import os
import mmap, io, array
import timeit
fn = 'path-to-largeish-file'
def fd_readinto_mmap(fd, ba):
m = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
ba.extend(m)
m.close()
def fd_readinto_fio(fd, ba):
sz = os.fstat(fd).st_size
ba2 = bytearray(sz)
with io.FileIO(fd, closefd = False) as fio:
fio.readinto(ba2)
ba.extend(ba2)
def fd_readinto_array(fd, ba):
ar = array.array('c')
sz = os.fstat(fd).st_size
fp = os.fdopen(fd, 'rb')
ar.fromfile(fp, sz)
ba.extend(ar)
def fd_readinto_mv(fd, ba):
stat = os.fstat(fd)
blksize = getattr(stat, 'st_blksize', 4096)
bufsize = stat.st_size
buf = bytearray(bufsize)
m = memoryview(buf)
while True:
b = os.read(fd, blksize)
s = len(b)
if not s: break
m[:s], m = b, m[s:]
writtenbytes = buffer(buf, 0, bufsize-len(m))
ba.extend(writtenbytes)
setup = """
from __main__ import fn, fd_readinto_mmap, fd_readinto_fio, fd_readinto_array, fd_readinto_mv
import os
openfd = lambda : os.open(fn, os.O_RDONLY)
closefd = lambda fd: os.close(fd)
"""
reps = 2
tests = {
'fio' : "fd=openfd(); fd_readinto_fio(fd, bytearray()); closefd(fd)",
'mmap': "fd=openfd(); fd_readinto_mmap(fd, bytearray()); closefd(fd)",
'array': "fd=openfd(); fd_readinto_array(fd, bytearray());",
'mv' : "fd=openfd(); fd_readinto_mv(fd, bytearray()); closefd(fd)",
}
width = max(map(len, tests))
for n,t in tests.iteritems():
time = timeit.timeit(t, setup, number=reps)
print ("{:%s} {}" % width).format(n, time)
在我的系统(OS X 10.14.6,Python 2.7.10)上,FileIO
是最快的选择:
mmap 7.19839119911
array 5.72453403473
mv 0.49933886528
fio 0.299485206604