我正在尝试使用 Boto3 moto 的 mocks3 实用程序来测试我连接到 s3 的代码,该函数基本上列出了所有具有分区日期的文件夹并返回最新的文件夹。我在模拟 s3 存储桶时没有发现异常,但是测试代码似乎没有找到那个存储桶
我的测试规范
import os
import unittest
from botocore.client import ClientError
from moto import mock_s3
from src.utils.aws_utils import *
import logging
log = logging.getLogger("my-logger")
MY_BUCKET = "mock_s3_bucket"
MY_PREFIX = "mock_folder"
@mock_s3
class TestPysparkUtils(unittest.TestCase):
def setUp(self):
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
s3.create_bucket(Bucket="{}".format(MY_BUCKET))
s3.Bucket(MY_BUCKET).put_object(Key='{}/{}/partition_date=20201223/file_20201223.txt'
.format(MY_BUCKET, MY_PREFIX), Body='def')
s3.Bucket(MY_BUCKET).put_object(Key='{}/{}/partition_date=20201222/file_20201222.txt'
.format(MY_BUCKET, MY_PREFIX), Body='abc')
def tearDown(self):
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
bucket = s3.Bucket(MY_BUCKET)
for key in bucket.objects.all():
key.delete()
bucket.delete()
def test_get_latest_file_path_inter(self):
print('{}/{}'.format(MY_BUCKET, MY_PREFIX))
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
try:
s3.meta.client.head_bucket(Bucket=MY_BUCKET)
print("Bucket Exists!")
except ClientError:
log.info('The bucket does not exist or you have no access.')
result = get_latest_file_path_inter(log, s3, 's3://{}/{}/'.format(MY_BUCKET, MY_PREFIX), 'partition_date')
print('------------------------')
print(result)
desired_result = ["foo.json", "bar.json"]
self.assertCountEqual(result, desired_result)
if __name__ == "__main__":
unittest.main()
测试功能
def get_latest_file_path_inter(logger, s3_client, base_path, partition):
"""
Returns full s3 path of latest partition assuming partition date is of format yyyyMMdd
:type (object, str, str) -> (str)
:parameter
:param logger Logger object
:param s3_client boto3 s3 client object
:param base_path Base s3 path
:param partition column name
"""
print("Inside get_latest_file_path_inter() : Given: {} {}".format(base_path, partition))
start = base_path.find("//") + 2
end = base_path.find("/", start)
bucket_in = base_path[start:end]
prefix_in = base_path[base_path.find(bucket_in) + len(bucket_in) + 1:]
print(
"bucket: {} | prefix: {} | partition: {} | path: s3://{}/{}".format(bucket_in, prefix_in, partition,
bucket_in, prefix_in))
objects = list(s3_client.Bucket(bucket_in).objects.filter(Prefix=prefix_in))
print("total objects found: {}".format(len(objects)))
dict_out = {}
if len(objects) == 0:
logger.info("Error. no files found")
return
for i in range(0, len(objects)):
file_str = objects[i].key
start = file_str.find(partition) + len(partition)
end = file_str.find("/", start)
part_found = file_str[start:end]
partial_path = file_str[:file_str.find(partition) + len(partition) + 8]
dict_out[part_found] = partial_path
dict_sort = collections.OrderedDict(sorted(dict_out.items()))
last = list(dict_sort.keys())[len(dict_sort) - 1]
path_final = "s3://{}/{}/".format(bucket_in, dict_sort.get(last))
print("path_final: {} for base_path: {} and partition: {} and last: {} and dict_sort: {}".format(
path_final, base_path, partition, last, dict_sort))
return path_final
输出
mock_s3_bucket/mock_folder
Inside get_latest_file_path_inter() : Given: s3://mock_s3_bucket/mock_folder/ partition_date
bucket: mock_s3_bucket | prefix: mock_folder/ | partition: partition_date | path: s3://mock_s3_bucket/mock_folder/
s3.Bucket(name='mock_s3_bucket')
total objects found: 0
------------------------
None