我正在尝试从 hive 创建表(CTAS)并希望以 BSON 格式编写文件,以便将其导入 MongoDb。这是我的查询:
create table if not exists rank_locn
ROW FORMAT SERDE "com.mongodb.hadoop.hive.BSONSerde"
STORED AS INPUTFORMAT "com.mongodb.hadoop.BSONFileInputFormat"
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
as
select RGN_OVRHD_NBR,DM_OVRHD_NBR,LOCN_NBR,Derived,
rank() OVER (ORDER BY DERIVED DESC) as NationalRnk,
rank() OVER (PARTITION BY RGN_OVRHD_NBR ORDER BY DERIVED DESC) as RegionRnk,
rank() OVER (PARTITION BY DM_OVRHD_NBR ORDER BY DERIVED DESC) as DistrictRnk
from Locn_Dim_Values
where Derived between -999999 and 999999;
启动了三个工作。最后一个reduce作业失败了。错误日志如下:
java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":78133,"reducesinkkey1":143.82632293080053},"value":{"_col0":1,"_col1":12,"_col2":79233,"_col3":78133,"_col4":1634,"_col5":143.82632293080053},"alias":0}
at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:274)
at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:522)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":78133,"reducesinkkey1":143.82632293080053},"value":{"_col0":1,"_col1":12,"_col2":79233,"_col3":78133,"_col4":1634,"_col5":143.82632293080053},"alias":0}
at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:262)
... 7 more
Caused by: java.lang.NullPointerException
at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.write(HiveIgnoreKeyTextOutputFormat.java:91)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:637)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:502)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:832)
at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:502)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:832)
at org.apache.hadoop.hive.ql.exec.PTFOperator.executeWindowExprs(PTFOperator.java:341)
at org.apache.hadoop.hive.ql.exec.PTFOperator.processInputPartition(PTFOperator.java:198)
at org.apache.hadoop.hive.ql.exec.PTFOperator.processOp(PTFOperator.java:130)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:502)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:832)
at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:502)
at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:253)
... 7 more
请帮我解决问题。