我实施了 garnaat 接受的答案的评论中暗示的解决方案:
import cStringIO
import gzip
def sendFileGz(bucket, key, fileName, suffix='.gz'):
key += suffix
mpu = bucket.initiate_multipart_upload(key)
stream = cStringIO.StringIO()
compressor = gzip.GzipFile(fileobj=stream, mode='w')
def uploadPart(partCount=[0]):
partCount[0] += 1
stream.seek(0)
mpu.upload_part_from_file(stream, partCount[0])
stream.seek(0)
stream.truncate()
with file(fileName) as inputFile:
while True: # until EOF
chunk = inputFile.read(8192)
if not chunk: # EOF?
compressor.close()
uploadPart()
mpu.complete_upload()
break
compressor.write(chunk)
if stream.tell() > 10<<20: # min size for multipart upload is 5242880
uploadPart()
它似乎没有问题。毕竟,流媒体在大多数情况下只是数据的分块。在这种情况下,块大约有 10MB 大,但谁在乎呢?只要我们不是在谈论几个 GB 块,我就可以了。
Python 3 的更新:
from io import BytesIO
import gzip
def sendFileGz(bucket, key, fileName, suffix='.gz'):
key += suffix
mpu = bucket.initiate_multipart_upload(key)
stream = BytesIO()
compressor = gzip.GzipFile(fileobj=stream, mode='w')
def uploadPart(partCount=[0]):
partCount[0] += 1
stream.seek(0)
mpu.upload_part_from_file(stream, partCount[0])
stream.seek(0)
stream.truncate()
with open(fileName, "rb") as inputFile:
while True: # until EOF
chunk = inputFile.read(8192)
if not chunk: # EOF?
compressor.close()
uploadPart()
mpu.complete_upload()
break
compressor.write(chunk)
if stream.tell() > 10<<20: # min size for multipart upload is 5242880
uploadPart()