0

当我运行以下查询时,我收到以下错误。

insert overwrite table mybug select row_number() over (order by clickstream_key) as key, clickstream_key as data from mytest;

它正在启动大约 2 个 mapper 任务和 240 个 reducer 任务,并且这项工作进展顺利且快速,多达 239 个任务,需要 3 小时来完成 4 次任务尝试,然后工作就失败了。表mytest包含 20 亿条记录,其中有一列“clickstream_key”。

2016-07-24 09:54:37,918 [ERROR] [TezChild] |tez.TezProcessor|: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$GenericUDAFRowNumberEvaluator.iterate(GenericUDAFRowNumber.java:102)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:185)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.evaluateWindowFunction(WindowingTableFunction.java:164)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.iterator(WindowingTableFunction.java:580)
at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:340)
at org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:617)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:631)
at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:290)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:164)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-07-24 09:54:37,920 [INFO] [TezChild] |task.TezTaskRunner|: Encounted an error while executing task: attempt_1469365079244_0001_1_01_000000_0
java.lang.RuntimeException: java.lang.OutOfMemoryError: Java heap space
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)

我尝试增加数据节点堆、reducer heap、mapper heap、hive.tez.java.opts 但它们都不起作用。对此的任何线索将不胜感激。错误说java堆空间,它是指哪个java堆空间?

4

0 回答 0