我在 s3 中有一个存储桶,它具有深层目录结构。我希望我可以一次下载它们。我的文件如下所示:
foo/bar/1. .
foo/bar/100 . .
有什么方法可以在 python 中使用 boto lib 从 s3 存储桶递归下载这些文件?
提前致谢。
我在 s3 中有一个存储桶,它具有深层目录结构。我希望我可以一次下载它们。我的文件如下所示:
foo/bar/1. .
foo/bar/100 . .
有什么方法可以在 python 中使用 boto lib 从 s3 存储桶递归下载这些文件?
提前致谢。
您可以像这样下载存储桶中的所有文件(未经测试):
from boto.s3.connection import S3Connection
conn = S3Connection('your-access-key','your-secret-key')
bucket = conn.get_bucket('bucket')
for key in bucket.list():
try:
res = key.get_contents_to_filename(key.name)
except:
logging.info(key.name+":"+"FAILED")
请记住,S3 中的文件夹只是另一种编写密钥名称的方式,只有客户端会将其显示为文件夹。
#!/usr/bin/env python
import boto
import sys, os
from boto.s3.key import Key
from boto.exception import S3ResponseError
DOWNLOAD_LOCATION_PATH = os.path.expanduser("~") + "/s3-backup/"
if not os.path.exists(DOWNLOAD_LOCATION_PATH):
print ("Making download directory")
os.mkdir(DOWNLOAD_LOCATION_PATH)
def backup_s3_folder():
BUCKET_NAME = "your-bucket-name"
AWS_ACCESS_KEY_ID= os.getenv("AWS_KEY_ID") # set your AWS_KEY_ID on your environment path
AWS_ACCESS_SECRET_KEY = os.getenv("AWS_ACCESS_KEY") # set your AWS_ACCESS_KEY on your environment path
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_SECRET_KEY)
bucket = conn.get_bucket(BUCKET_NAME)
#goto through the list of files
bucket_list = bucket.list()
for l in bucket_list:
key_string = str(l.key)
s3_path = DOWNLOAD_LOCATION_PATH + key_string
try:
print ("Current File is ", s3_path)
l.get_contents_to_filename(s3_path)
except (OSError,S3ResponseError) as e:
pass
# check if the file has been downloaded locally
if not os.path.exists(s3_path):
try:
os.makedirs(s3_path)
except OSError as exc:
# let guard againts race conditions
import errno
if exc.errno != errno.EEXIST:
raise
if __name__ == '__main__':
backup_s3_folder()
import boto, os
LOCAL_PATH = 'tmp/'
AWS_ACCESS_KEY_ID = 'YOUUR_AWS_ACCESS_KEY_ID'
AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY'
bucket_name = 'your_bucket_name'
# connect to the bucket
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = conn.get_bucket(bucket_name)
# go through the list of files
bucket_list = bucket.list()
for l in bucket_list:
keyString = str(l.key)
d = LOCAL_PATH + keyString
try:
l.get_contents_to_filename(d)
except OSError:
# check if dir exists
if not os.path.exists(d):
os.makedirs(d) # Creates dirs recurcivly
刚刚将目录创建部分添加到@j0nes 评论
from boto.s3.connection import S3Connection
import os
conn = S3Connection('your-access-key','your-secret-key')
bucket = conn.get_bucket('bucket')
for key in bucket.list():
print key.name
if key.name.endswith('/'):
if not os.path.exists('./'+key.name):
os.makedirs('./'+key.name)
else:
res = key.get_contents_to_filename('./'+key.name)
这会将文件下载到当前目录并在需要时创建目录。
如果文件夹中有超过 1000 个文件,则需要使用分页器来遍历它们
import boto3
import os
# create the client object
client = boto3.client(
's3',
aws_access_key_id= S3_ACCESS_KEY,
aws_secret_access_key= S3_SECRET_KEY
)
# bucket and folder urls
bucket= 'bucket-name'
data_key = 'key/to/data/'
paginator = client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket, Prefix=data_dir):
for obj in page['Contents']:
key = obj['Key']
tmp_dir = '/'.join(key.split('/')[0:-1])
if not os.path.exists('/'.join(key.split('/')[0:-1])):
os.makedirs(tmp_dir)
else:
client.download_file(bucket, key, tmp_dir + key.split('/')[-1])
import boto
from boto.s3.key import Key
keyId = 'YOUR_AWS_ACCESS_KEY_ID'
sKeyId='YOUR_AWS_ACCESS_KEY_ID'
bucketName='your_bucket_name'
conn = boto.connect_s3(keyId,sKeyId)
bucket = conn.get_bucket(bucketName)
for key in bucket.list():
print ">>>>>"+key.name
pathV = key.name.split('/')
if(pathV[0] == "data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName)
elif(pathV[0] == "nlu_data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/nlu_data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName`