0

我正在使用 java 应用程序将 gzip 文件上传到 S3 存储桶,其数据将在 Athena 中使用。上传时 gzip 文件损坏。由于 Athena 无法查看 gzip 文件中的数据,因此在下载文件并手动尝试解压缩时,它会显示“它不是 gzip 文件”。

private void getAndProcessFilesGenReports(String parUrl, String custCode, long size, String queryDate) {
            try (CloseableHttpClient httpclient = HttpClientBuilder.create().setDefaultCredentialsProvider(getCredentialsProvider()).build();) {
          CloseableHttpResponse response;
          HttpGet httpget = new HttpGet(BASE_URI.concat(parUrl));
          response = httpclient.execute(httpget);
          httpget.setConfig(config);

              response.getStatusLine().getStatusCode(), response.getStatusLine().getReasonPhrase());

          if (response.getStatusLine().getStatusCode() != 200) {
            log.error("getAndProcessFilesGenReports partUrl could not get response for custCode---> {}", custCode);
          }

          if (response.getStatusLine().getStatusCode() == 200) {
            GZIPInputStream gzis = new GZIPInputStream(response.getEntity().getContent());
            String bucketName = bucketForDetailedBilling(GEN_REPORT_TYPE, custCode, queryDate);
            uploadGzipFileToS3(gzis, size, bucketName);
          }
        } catch (Exception e) {
          log.error("error in getAndProcessFilesGenReports()--->", e);
        }
      }
private void uploadGzipFileToS3(InputStream gzis, long size, String bucketName) {
    log.info("uploadGzipFileToS3 size{} --- bucketName {}--->", size, bucketName);
    ClientConfiguration clientConfiguration = new ClientConfiguration();
    clientConfiguration.setConnectionMaxIdleMillis(600000);
    clientConfiguration.setConnectionTimeout(600000);
    clientConfiguration.setClientExecutionTimeout(600000);
    clientConfiguration.setUseGzip(true);
    clientConfiguration.setConnectionTTL(1000 * 60 * 60);
    AmazonS3Client amazonS3Client = new AmazonS3Client(clientConfiguration);
    TransferManager transferManager = new TransferManager(amazonS3Client);
    try {
      ObjectMetadata objectMetadata = new ObjectMetadata();
      objectMetadata.setContentLength(size);

      transferManager.getConfiguration().setMultipartUploadThreshold(1024 * 5);

      PutObjectRequest request = new PutObjectRequest(bucketName, DBR_NAME + DBR_EXT, gzis, objectMetadata);
      request.getRequestClientOptions().setReadLimit(1024 * 5 + 1);
      request.setSdkClientExecutionTimeout(10000 * 60 * 60);

      Upload upload = transferManager.upload(request);

      upload.waitForCompletion();
    }`
4

0 回答 0