我正在尝试为 Hive 编写 GenericUDF。当我添加 JAR 并尝试创建指向该类的临时函数时,出现错误,因此函数创建不成功。有人可以为 GenericUDF 函数提供 Java 代码模板,该函数接受一个 String 类型的参数并返回一个 Map 类型的对象。关于 Object Inspector 类有很多混淆,所以如果有人为我提供这个函数的输入类型和返回类型的非常基本的模板,那将有很大帮助。
所以我的Java代码在这里: -
package test;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
public class CustomUDF extends GenericUDF
{
Map<Character,Integer> myMap;
StringObjectInspector elementOI;
@Override
public String getDisplayString(String[] arg0)
{
// TODO Auto-generated method stub
return "myfunction()";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arg) throws UDFArgumentException
{
if (arg.length != 1)
{
throw new UDFArgumentLengthException("myfunction() only takes 1 argument: String");
}
// 1. Check we received the right object types.
ObjectInspector a = arg[0];
if (!(a instanceof StringObjectInspector))
{
throw new UDFArgumentException("Argument must be a string");
}
// The custom function is going to return an object of type Map<Character,Integer>
myMap = new HashMap<Character,Integer>();
this.elementOI = (StringObjectInspector) a;
// I could not find way to create ObjectInspector for Character so I am creating ObjectInspector for String
return ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] arg) throws HiveException
{
myMap.clear();
String str = elementOI.getPrimitiveJavaObject(arg[0].get());
// check for nulls
if (str == null)
{
return null;
}
for(Character ch: str.toCharArray())
{
myMap.put(ch,1);
}
return myMap;
}
}
这就是我尝试在 Hive 中针对 JAR 创建函数时发生的情况:-
hive> add jar test.jar;
Added [test.jar] to class path
Added resources: [test.jar]
hive> create temporary function myfunction as 'test.CustomUDF';
java.lang.UnsupportedClassVersionError: test/CustomUDF : Unsupported major.minor version 52.0
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:803)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:278)
at org.apache.hadoop.hive.ql.exec.FunctionTask.getUdfClass(FunctionTask.java:307)
at org.apache.hadoop.hive.ql.exec.FunctionTask.createTemporaryFunction(FunctionTask.java:174)
at org.apache.hadoop.hive.ql.exec.FunctionTask.execute(FunctionTask.java:74)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:86)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1631)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1390)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1197)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1024)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1014)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:250)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:202)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:786)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:680)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:619)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.FunctionTask. test/CustomUDF : Unsupported major.minor version 52.0