我有一个 S3 服务器,每个存储桶下都有数百万个文件。我想从存储桶下载文件,但只下载满足特定条件的文件。有没有比获取所有存储桶然后在遍历文件时检查特定条件更好的方法?可以在这里看到:
import os
# Import the SDK
import boto
from boto.s3.connection import OrdinaryCallingFormat
LOCAL_PATH = 'W:/RD/Fancy/s3_opportunities/'
bucket_name = '/recording'#/sampledResponseLogger'
# connect to the bucket
print 'Connecting...'
conn = boto.connect_s3(calling_format=OrdinaryCallingFormat()) #conn = boto.connect_s3()
print 'Getting bucket...'
bucket = conn.get_bucket(bucket_name)
print 'Going through the list of files...'
bucket_list = bucket.list()
for l in bucket_list:
keyString = str(l.key)
# SOME CONDITION
if('2015-08' in keyString):
# check if file exists locally, if not: download it
filename=LOCAL_PATH+keyString[56:]
if not os.path.exists(filename):
print 'Downloading file: ' + keyString + '...'
# Download the object that the key represents
l.get_contents_to_filename(filename)