0

我在 Amazon S3 中有一个文本文件,我想在我的 Hive UDF 中读取文件的内容。

尝试了以下代码,但不起作用。

UDF 代码:

package jbr.hiveudf;

import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;

public class ReadS3FileContent extends UDF {
  String output = "";
  FileSystem _fileSystem;

  public String evaluate(String s3File) {

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(_fileSystem.open(new Path(s3File.toString()))));
      String line;
      while ((line = br.readLine()) != null) {
        output = output + line;
      }
    } catch (Exception e) {
      System.out.println(e.getMessage());
    }

    return output;
  }
}

蜂巢查询:

ADD JAR s3://ranjith/myhive/lib/MyHiveUDF-0.1.jar;
CREATE TEMPORARY FUNCTION myhiveudf AS 'jbr.hiveudf.ReadS3FileContent';
SELECT myhiveudf('s3n://ranjith/myhive/hive-data.txt') FROM mydb.mytable;

有什么帮助吗?

4

1 回答 1

1

找到了解决方案,这是我的示例程序。

将以下依赖项添加到 pom.xml

<dependency>
    <groupId>com.amazonaws</groupId>
    <artifactId>aws-java-sdk</artifactId>
    <version>1.10.40</version>
</dependency>

UDF Java 程序:

package jbr.hiveudf;

import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.hadoop.hive.ql.exec.UDF;

import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;

public class ReadAwsS3FileContent extends UDF {

  /**
   * 
   * @param accessKey - access key of AWS.
   * @param secretKey - secret key of AWS.
   * @param bucketName - bucket name, i.e name of the bucket (e.g: mybucket)
   * @param fileKey - file path, i.e under bucket
   *          (myfolder1/myfolder2/myfile1.txt)
   * @return
   */
  public String evaluate(String accessKey, String secretKey, String bucketName, String fileKey) throws Exception {
    AmazonS3 amazonS3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey));
    S3Object s3Object = amazonS3.getObject(new GetObjectRequest(bucketName, fileKey));
    BufferedReader br = new BufferedReader(new InputStreamReader(s3Object.getObjectContent()));

    String line;
    while ((line = br.readLine()) != null) {
      System.out.println(line);
    }

    return "";
  }
}

蜂巢查询:

ADD JAR s3://ranjith/myhive/lib/MyHiveUDF-0.1.jar;
CREATE TEMPORARY FUNCTION ReadS3FileContent AS 'jbr.hiveudf.ReadS3FileContent';
SELECT ReadS3FileContent('aws-access-key','aws-secrect-key','bucket-name','file-name-full-path-without-bucket-name') FROM mydb.mytable;
于 2016-02-23T08:11:57.357 回答