我正在尝试将 pandas-profiling 的 html 输出从我的胶水作业加载到 s3,但得到的是空文件。将内存中的文件保存到 s3 是一个问题。我尝试了以下解决方案,但没有运气将内存中的 HTML 保存到 S3 AWS Python Boto3
import pandas as pd
import boto3
import io
from pandas_profiling import ProfileReport
from io import StringIO
#Pull all file names/keys from S3
s3 = boto3.client('s3')
def get_matching_s3_keys(bucket, prefix='', suffix=''):
"""
Generate the keys in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch keys that start with this prefix (optional).
:param suffix: Only fetch keys that end with this suffix (optional).
"""
kwargs = {'Bucket': bucket, 'Prefix': prefix}
while True:
resp = s3.list_objects_v2(**kwargs)
for obj in resp['Contents']:
key = obj['Key']
if key.endswith(suffix):
yield key
try:
kwargs['ContinuationToken'] = resp['NextContinuationToken']
except KeyError:
break
#Pull all file paths and append to list
tables_list = []
for key in get_matching_s3_keys('mybucketname', 'processed/', '.csv'):
print(key)
tables_list.append(key)
for i in tables_list:
obj = s3.get_object(Bucket='mybucketname', Key=i)
df = pd.read_csv(obj['Body'])
profile = ProfileReport(df, title = 'My Data Profile', html ={"style": {'full_width':True}}, minimal=True)
profile.to_file(i.lstrip("processed/").rstrip(".csv")+".html")
str_obj = StringIO()
profile.to_file(str_obj, 'html')
buf = str_obj.getvalue().encode()
# Upload as bytes
s3.put_object(
Bucket='mybucketname',
Key=i.lstrip("processed/").rstrip(".csv")+".html",
Body=buf
)
关于我需要用我的代码调整什么的任何想法?