我需要用 python 编写代码,从 Amazon s3 存储桶中删除所需的文件。我可以连接到 Amazon s3 存储桶,也可以保存文件,但如何删除文件?
19 回答
使用boto3
(当前版本 1.4.4)使用S3.Object.delete()
.
import boto3
s3 = boto3.resource('s3')
s3.Object('your-bucket', 'your-key').delete()
Using the Python boto3 SDK (and assuming credentials are setup for AWS), the following will delete a specified object in a bucket:
import boto3
client = boto3.client('s3')
client.delete_object(Bucket='mybucketname', Key='myfile.whatever')
使用 boto 找到了另一种方法:
from boto.s3.connection import S3Connection, Bucket, Key
conn = S3Connection(AWS_ACCESS_KEY, AWS_SECERET_KEY)
b = Bucket(conn, S3_BUCKET_NAME)
k = Key(b)
k.key = 'images/my-images/'+filename
b.delete_key(k)
欢迎来到 2020,这是 Python/Django 中的答案:
from django.conf import settings
import boto3
s3 = boto3.client('s3')
s3.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"media/{item.file.name}")
我花了太长时间才找到答案,就这么简单。
请尝试此代码
import boto3
s3 = boto3.client('s3')
s3.delete_object(Bucket="s3bucketname", Key="s3filepath")
尝试寻找更新的方法,因为 Boto3 可能会不时更改。我使用了 my_bucket.delete_objects():
import boto3
from boto3.session import Session
session = Session(aws_access_key_id='your_key_id',
aws_secret_access_key='your_secret_key')
# s3_client = session.client('s3')
s3_resource = session.resource('s3')
my_bucket = s3_resource.Bucket("your_bucket_name")
response = my_bucket.delete_objects(
Delete={
'Objects': [
{
'Key': "your_file_name_key" # the_name of_your_file
}
]
}
)
我很惊讶没有这种简单的方法key.delete()
::
from boto.s3.connection import S3Connection, Bucket, Key
conn = S3Connection(AWS_ACCESS_KEY, AWS_SECERET_KEY)
bucket = Bucket(conn, S3_BUCKET_NAME)
k = Key(bucket = bucket, name=path_to_file)
k.delete()
通过哪个接口?使用 REST 接口,您只需发送一个 delete:
DELETE /ObjectName HTTP/1.1
Host: BucketName.s3.amazonaws.com
Date: date
Content-Length: length
Authorization: signatureValue
通过SOAP 接口:
<DeleteObject xmlns="http://doc.s3.amazonaws.com/2006-03-01">
<Bucket>quotes</Bucket>
<Key>Nelson</Key>
<AWSAccessKeyId> 1D9FVRAYCP1VJEXAMPLE=</AWSAccessKeyId>
<Timestamp>2006-03-01T12:00:00.183Z</Timestamp>
<Signature>Iuyz3d3P0aTou39dzbqaEXAMPLE=</Signature>
</DeleteObject>
如果您使用的是像 boto这样的 Python 库,它应该公开一个“删除”功能,比如delete_key()
.
下面是可用于删除存储桶的代码片段,
import boto3, botocore
from botocore.exceptions import ClientError
s3 = boto3.resource("s3",aws_access_key_id='Your-Access-Key',aws_secret_access_key='Your-Secret-Key')
s3.Object('Bucket-Name', 'file-name as key').delete()
最简单的方法是:
import boto3
s3 = boto3.resource("s3")
bucket_source = {
'Bucket': "my-bcuket",
'Key': "file_path_in_bucket"
}
s3.meta.client.delete(bucket_source)
现在我已经通过使用 Linux 实用程序s3cmd解决了这个问题。我在 Python 中这样使用它:
delFile = 's3cmd -c /home/project/.s3cfg del s3://images/anon-images/small/' + filename
os.system(delFile)
您可以使用 aws cli 来完成:https ://aws.amazon.com/cli/和一些 unix 命令。
这个 aws cli 命令应该可以工作:
aws s3 rm s3://<your_bucket_name> --exclude "*" --include "<your_regex>"
如果要包含子文件夹,则应添加标志--recursive
或使用 unix 命令:
aws s3 ls s3://<your_bucket_name>/ | awk '{print $4}' | xargs -I% <your_os_shell> -c 'aws s3 rm s3:// <your_bucket_name> /% $1'
解释:
- 列出存储桶上的所有文件--pipe-->
- 获取第 4 个参数(它的文件名)--pipe--> // 你可以用 linux 命令替换它以匹配你的模式
- 使用 aws cli 运行删除脚本
使用 中的S3FileSystem.rm
功能s3fs
。
您可以一次删除一个文件或多个文件:
import s3fs
file_system = s3fs.S3FileSystem()
file_system.rm('s3://my-bucket/foo.txt') # single file
files = ['s3://my-bucket/bar.txt', 's3://my-bucket/baz.txt']
file_system.rm(files) # several files
2021 年更新 - 我在这方面遇到了困难,但这就像做起来一样简单。
def delete_object(self,request):
s3 = boto3.resource('s3',
aws_access_key_id=AWS_UPLOAD_ACCESS_KEY_ID,
aws_secret_access_key= AWS_UPLOAD_SECRET_KEY,
)
s3.Object('your-bucket', 'your-key}').delete()
确保在您的 boto3 资源中添加凭据
如果您尝试使用自己的本地主机控制台删除文件,那么您可以尝试运行此 python 脚本,假设您已经在系统中分配了访问 ID 和密钥
import boto3
#my custom sesssion
aws_m=boto3.session.Session(profile_name="your-profile-name-on-local-host")
client=aws_m.client('s3')
#list bucket objects before deleting
response = client.list_objects(
Bucket='your-bucket-name'
)
for x in response.get("Contents", None):
print(x.get("Key",None));
#delete bucket objects
response = client.delete_object(
Bucket='your-bucket-name',
Key='mydocs.txt'
)
#list bucket objects after deleting
response = client.list_objects(
Bucket='your-bucket-name'
)
for x in response.get("Contents", None):
print(x.get("Key",None));
它对我有用,试试吧。
import boto
import sys
from boto.s3.key import Key
import boto.s3.connection
AWS_ACCESS_KEY_ID = '<access_key>'
AWS_SECRET_ACCESS_KEY = '<secret_access_key>'
Bucketname = 'bucket_name'
conn = boto.s3.connect_to_region('us-east-2',
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
is_secure=True,
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
bucket = conn.get_bucket(Bucketname)
k = Key(bucket)
k.key = 'filename to delete'
bucket.delete_key(k)
如果您想用几行代码以最简单的方式从 s3 存储桶中删除所有文件,请使用它。
import boto3
s3 = boto3.resource('s3', aws_access_key_id='XXX', aws_secret_access_key= 'XXX')
bucket = s3.Bucket('your_bucket_name')
bucket.objects.delete()
这是我是如何做到的
"""
This is module which contains all classes related to aws S3
"""
"""
awshelper.py
-------
This module contains the AWS class
"""
try:
import re
import os
import json
import boto3
import datetime
import uuid
import math
from boto3.s3.transfer import TransferConfig
import threading
import sys
from tqdm import tqdm
except Exception as e:
print("Error : {} ".format(e))
DATA = {
"AWS_ACCESS_KEY": "XXXXXXXXXXXX",
"AWS_SECRET_KEY": "XXXXXXXXXXXXX",
"AWS_REGION_NAME": "us-east-1",
"BUCKET": "XXXXXXXXXXXXXXXXXXXX",
}
for key, value in DATA.items():os.environ[key] = str(value)
class Size:
@staticmethod
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
class ProgressPercentage(object):
def __init__(self, filename, filesize):
self._filename = filename
self._size = filesize
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
def convertSize(size):
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size,1024)))
p = math.pow(1024,i)
s = round(size/p,2)
return '%.2f %s' % (s,size_name[i])
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%) " % (
self._filename, convertSize(self._seen_so_far), convertSize(self._size),
percentage))
sys.stdout.flush()
class ProgressPercentageUpload(object):
def __init__(self, filename):
self._filename = filename
self._size = float(os.path.getsize(filename))
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%)" % (
self._filename, self._seen_so_far, self._size,
percentage))
sys.stdout.flush()
class AWSS3(object):
"""Helper class to which add functionality on top of boto3 """
def __init__(self, bucket, aws_access_key_id, aws_secret_access_key, region_name):
self.BucketName = bucket
self.client = boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name,
)
def get_length(self, Key):
response = self.client.head_object(Bucket=self.BucketName, Key=Key)
size = response["ContentLength"]
return {"bytes": size, "size": Size.convert_size(size)}
def put_files(self, Response=None, Key=None):
"""
Put the File on S3
:return: Bool
"""
try:
response = self.client.put_object(
ACL="private", Body=Response, Bucket=self.BucketName, Key=Key
)
return "ok"
except Exception as e:
print("Error : {} ".format(e))
return "error"
def item_exists(self, Key):
"""Given key check if the items exists on AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return True
except Exception as e:
return False
def get_item(self, Key):
"""Gets the Bytes Data from AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return response_new["Body"].read()
except Exception as e:
print("Error :{}".format(e))
return False
def find_one_update(self, data=None, key=None):
"""
This checks if Key is on S3 if it is return the data from s3
else store on s3 and return it
"""
flag = self.item_exists(Key=key)
if flag:
data = self.get_item(Key=key)
return data
else:
self.put_files(Key=key, Response=data)
return data
def delete_object(self, Key):
response = self.client.delete_object(Bucket=self.BucketName, Key=Key,)
return response
def get_all_keys(self, Prefix="", max_page_number=100):
"""
:param Prefix: Prefix string
:return: Keys List
"""
try:
paginator = self.client.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=self.BucketName, Prefix=Prefix)
tmp = []
for page_no, page in enumerate(pages):
if page_no >max_page_number:break
print("page_no : {}".format(page_no))
for obj in page["Contents"]:
tmp.append(obj["Key"])
return tmp
except Exception as e:
return []
def print_tree(self):
keys = self.get_all_keys()
for key in keys:
print(key)
return None
def find_one_similar_key(self, searchTerm=""):
keys = self.get_all_keys()
return [key for key in keys if re.search(searchTerm, key)]
def __repr__(self):
return "AWS S3 Helper class "
def download_file_locally(self, key, filename):
try:
response = self.client.download_file(
Bucket=self.BucketName,
Filename=filename,
Key=key,
Callback=ProgressPercentage(filename,
(self.client.head_object(Bucket=self.BucketName,
Key=key))["ContentLength"]),
Config=TransferConfig(
max_concurrency=10,
use_threads=True,
)
)
return True
except Exception as e:
print("Error Download file : {}".format(e))
return False
def upload_files_from_local(self, file_name, key):
try:
response = self.client.upload_file(
Filename=file_name,
Bucket=self.BucketName ,
Key = key,
Callback=ProgressPercentageUpload(file_name),
Config=TransferConfig(
max_concurrency=10,
use_threads=True,
))
return True
except Exception as e:
print("Error upload : {} ".format(e))
return False
def batch_objects_delete_threadded(batch_size=50, max_page_size=100):
helper_qa = AWSS3(
aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
aws_secret_access_key=os.getenv("XXXXXXXXXXXXX"),
region_name=os.getenv("AWS_REGION_NAME"),
bucket=os.getenv("BUCKET"),
)
keys = helper_qa.get_all_keys(Prefix="database=XXXXXXXXXXX/", max_page_number=max_page_size)
MainThreads = [threading.Thread(target=helper_qa.delete_object, args=(key, )) for key in keys]
print("Length: keys : {} ".format(len(keys)))
for thread in tqdm(range(0, len(MainThreads), batch_size)):
for t in MainThreads[thread: thread + batch_size]:t.start()
for t in MainThreads[thread: thread + batch_size] : t.join()
# ==========================================
start = datetime.datetime.now()
batch_objects_delete_threadded()
end = datetime.datetime.now()
print("Execution Time : {} ".format(end-start))
# ==========================================
以下对我有用(基于 Django 模型的示例,但您几乎可以单独使用该delete
方法的代码)。
import boto3
from boto3.session import Session
from django.conf import settings
class Video(models.Model):
title=models.CharField(max_length=500)
description=models.TextField(default="")
creation_date=models.DateTimeField(default=timezone.now)
videofile=models.FileField(upload_to='videos/', null=True, verbose_name="")
tags = TaggableManager()
actions = ['delete']
def __str__(self):
return self.title + ": " + str(self.videofile)
def delete(self, *args, **kwargs):
session = Session (settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
s3_resource = session.resource('s3')
s3_bucket = s3_resource.Bucket(settings.AWS_STORAGE_BUCKET_NAME)
file_path = "media/" + str(self.videofile)
response = s3_bucket.delete_objects(
Delete={
'Objects': [
{
'Key': file_path
}
]
})
super(Video, self).delete(*args, **kwargs)