我无法使用带有 Hive 的大象鸟 4.14 反序列化其中重复字符串的 protobuf 数据。这似乎是因为重复字符串功能仅适用于 Protobuf 2.6 而不是 Protobuf 2.5。在 AWS EMR 集群中运行我的 Hive 查询时,它使用与 AWS Hive 捆绑的 Protobuf 2.5。即使在明确添加 Protobuf 2.6 jar 之后,我也无法摆脱这个错误。我想知道如何让 hive 使用我明确添加的 Protobuf 2.6 jar。
以下是使用的配置单元查询:
add jar s3://gam.test/hive-jars/protobuf-java-2.6.1.jar;
add jar s3://gam.test/hive-jars/GAMDataModel-1.0.jar;
add jar s3://gam.test/hive-jars/GAMCoreModel-1.0.jar;
add jar s3://gam.test/hive-jars/GAMAccessLayer-1.1.jar;
add jar s3://gam.test/hive-jars/RodbHiveStorageHandler-0.12.0-jarjar-final.jar;
add jar s3://gam.test/hive-jars/elephant-bird-core-4.14.jar;
add jar s3://gam.test/hive-jars/elephant-bird-hive-4.14.jar;
add jar s3://gam.test/hive-jars/elephant-bird-hadoop-compat-4.14.jar;
add jar s3://gam.test/hive-jars/protobuf-java-2.6.1.jar;
add jar s3://gam.test/hive-jars/GamProtoBufHiveDeserializer-1.0-jarjar.jar;
drop table GamRelationRodb;
CREATE EXTERNAL TABLE GamRelationRodb
row format serde "com.amazon.hive.serde.GamProtobufDeserializer"
with serdeproperties("serialization.class"=
"com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper")
STORED BY 'com.amazon.rodb.hadoop.hive.RodbHiveStorageHandler' TBLPROPERTIES
("file.name" = 'GAM_Relationship',"file.path" ='s3://pathtofile/');
select * from GamRelationRodb limit 10;
下面是 Protobuf 文件的格式:
message RepeatedRelationshipWrapper {
repeated relationship.Relationship relationships = 1;
}
message Relationship {
required RelationshipType type = 1;
repeated string ids = 2;
}
enum RelationshipType {
UKNOWN_RELATIONSHIP_TYPE = 0;
PARENT = 1;
CHILD = 2;
}
下面是运行查询时抛出的运行时异常:
Exception in thread "main" java.lang.NoSuchMethodError: com.google.protobuf.LazyStringList.getUnmodifiableView()Lcom/google/protobuf/LazyStringList;
at com.amazon.gam.model.RelationshipProto$Relationship.<init>(RelationshipProto.java:215)
at com.amazon.gam.model.RelationshipProto$Relationship.<init>(RelationshipProto.java:137)
at com.amazon.gam.model.RelationshipProto$Relationship$1.parsePartialFrom(RelationshipProto.java:239)
at com.amazon.gam.model.RelationshipProto$Relationship$1.parsePartialFrom(RelationshipProto.java:234)
at com.google.protobuf.CodedInputStream.readMessage(CodedInputStream.java:309)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper.<init>(RepeatedRelationshipWrapperProto.java:126)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper.<init>(RepeatedRelationshipWrapperProto.java:72)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper$1.parsePartialFrom(RepeatedRelationshipWrapperProto.java:162)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper$1.parsePartialFrom(RepeatedRelationshipWrapperProto.java:157)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper$Builder.mergeFrom(RepeatedRelationshipWrapperProto.java:495)
at com.amazon.gam.rodb.model.RepeatedRelationshipWrapperProto$RepeatedRelationshipWrapper$Builder.mergeFrom(RepeatedRelationshipWrapperProto.java:355)
at com.google.protobuf.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:337)
at com.google.protobuf.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:267)
at com.google.protobuf.AbstractMessageLite$Builder.mergeFrom(AbstractMessageLite.java:170)
at com.google.protobuf.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:882)
at com.google.protobuf.AbstractMessage$Builder.mergeFrom(AbstractMessage.java:267)
at com.twitter.elephantbird.mapreduce.io.ProtobufConverter.fromBytes(ProtobufConverter.java:66)
at com.twitter.elephantbird.hive.serde.ProtobufDeserializer.deserialize(ProtobufDeserializer.java:59)
at com.amazon.hive.serde.GamProtobufDeserializer.deserialize(GamProtobufDeserializer.java:63)
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:502)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2098)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:252)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)