我正在使用 java 应用程序将 gzip 文件上传到 S3 存储桶,其数据将在 Athena 中使用。上传时 gzip 文件损坏。由于 Athena 无法查看 gzip 文件中的数据,因此在下载文件并手动尝试解压缩时,它会显示“它不是 gzip 文件”。
private void getAndProcessFilesGenReports(String parUrl, String custCode, long size, String queryDate) {
try (CloseableHttpClient httpclient = HttpClientBuilder.create().setDefaultCredentialsProvider(getCredentialsProvider()).build();) {
CloseableHttpResponse response;
HttpGet httpget = new HttpGet(BASE_URI.concat(parUrl));
response = httpclient.execute(httpget);
httpget.setConfig(config);
response.getStatusLine().getStatusCode(), response.getStatusLine().getReasonPhrase());
if (response.getStatusLine().getStatusCode() != 200) {
log.error("getAndProcessFilesGenReports partUrl could not get response for custCode---> {}", custCode);
}
if (response.getStatusLine().getStatusCode() == 200) {
GZIPInputStream gzis = new GZIPInputStream(response.getEntity().getContent());
String bucketName = bucketForDetailedBilling(GEN_REPORT_TYPE, custCode, queryDate);
uploadGzipFileToS3(gzis, size, bucketName);
}
} catch (Exception e) {
log.error("error in getAndProcessFilesGenReports()--->", e);
}
}
private void uploadGzipFileToS3(InputStream gzis, long size, String bucketName) {
log.info("uploadGzipFileToS3 size{} --- bucketName {}--->", size, bucketName);
ClientConfiguration clientConfiguration = new ClientConfiguration();
clientConfiguration.setConnectionMaxIdleMillis(600000);
clientConfiguration.setConnectionTimeout(600000);
clientConfiguration.setClientExecutionTimeout(600000);
clientConfiguration.setUseGzip(true);
clientConfiguration.setConnectionTTL(1000 * 60 * 60);
AmazonS3Client amazonS3Client = new AmazonS3Client(clientConfiguration);
TransferManager transferManager = new TransferManager(amazonS3Client);
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(size);
transferManager.getConfiguration().setMultipartUploadThreshold(1024 * 5);
PutObjectRequest request = new PutObjectRequest(bucketName, DBR_NAME + DBR_EXT, gzis, objectMetadata);
request.getRequestClientOptions().setReadLimit(1024 * 5 + 1);
request.setSdkClientExecutionTimeout(10000 * 60 * 60);
Upload upload = transferManager.upload(request);
upload.waitForCompletion();
}`