1

我有 Pentaho MapReduce 作业(基本上是 Java 作业),它将 HBase 数据作为地图输入。工作流程非常适合少量数据(例如 100 行数据),但在运行几十万条记录时会失败。两个映射器作业被提交到集群,它们正在做简单的数据聚合(大约 400 000 行分隔在两个 HBase 区域中)。它接缝任务未能在 600 秒内报告其状态,这是由 mapred-site.xml 中的 mapred.task.timeout 设置调节的。我不确定如何在 Hadoop 的 Cloudera 4.1.4 发行版中更改此设置?同样在以下错误日志中,您可以看到一些其他错误:

Meta VERSION="1" .
Job JOBID="job_201309201413_0003" JOBNAME="hotel reviews agg HBase" USER="root" SUBMIT_TIME="1379684231747" JOBCONF="hdfs://HDP-MASTER\.PI\.LOCAL:8020/user/root/\.staging/job_201309201413_0003/job\.xml" VIEW_JOB="*" MODIFY_JOB="*" JOB_QUEUE="default" .
Job JOBID="job_201309201413_0003" JOB_PRIORITY="NORMAL" .
Job JOBID="job_201309201413_0003" LAUNCH_TIME="1379684231998" TOTAL_MAPS="2" TOTAL_REDUCES="1" JOB_STATUS="PREP" .
Task TASKID="task_201309201413_0003_m_000003" TASK_TYPE="SETUP" START_TIME="1379684232152" SPLITS="" .
MapAttempt TASK_TYPE="SETUP" TASKID="task_201309201413_0003_m_000003" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000003_0" START_TIME="1379684232617" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="SETUP" TASKID="task_201309201413_0003_m_000003" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000003_0" TASK_STATUS="SUCCESS" FINISH_TIME="1379684234634" HOSTNAME="/default/HDP-MASTER\.PI\.LOCAL" STATE_STRING="setup" COUNTERS="{(org\.apache\.hadoop\.mapreduce\.FileSystemCounter)(File System Counters)[(FILE_BYTES_READ)(FILE: Number of bytes read)(0)][(FILE_BYTES_WRITTEN)(FILE: Number of bytes written)(285100)][(FILE_READ_OPS)(FILE: Number of read operations)(0)][(FILE_LARGE_READ_OPS)(FILE: Number of large read operations)(0)][(FILE_WRITE_OPS)(FILE: Number of write operations)(0)][(HDFS_BYTES_READ)(HDFS: Number of bytes read)(0)][(HDFS_BYTES_WRITTEN)(HDFS: Number of bytes written)(0)][(HDFS_READ_OPS)(HDFS: Number of read operations)(0)][(HDFS_LARGE_READ_OPS)(HDFS: Number of large read operations)(0)][(HDFS_WRITE_OPS)(HDFS: Number of write operations)(1)]}{(org\.apache\.hadoop\.mapreduce\.TaskCounter)(Map-Reduce Framework)[(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(180)][(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(143708160)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(739024896)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(200998912)]}nullnullnullnullnullnullnullnullnullnullnullnullnull" .
Task TASKID="task_201309201413_0003_m_000003" TASK_TYPE="SETUP" TASK_STATUS="SUCCESS" FINISH_TIME="1379684234879" COUNTERS="{(org\.apache\.hadoop\.mapreduce\.FileSystemCounter)(File System Counters)[(FILE_BYTES_READ)(FILE: Number of bytes read)(0)][(FILE_BYTES_WRITTEN)(FILE: Number of bytes written)(285100)][(FILE_READ_OPS)(FILE: Number of read operations)(0)][(FILE_LARGE_READ_OPS)(FILE: Number of large read operations)(0)][(FILE_WRITE_OPS)(FILE: Number of write operations)(0)][(HDFS_BYTES_READ)(HDFS: Number of bytes read)(0)][(HDFS_BYTES_WRITTEN)(HDFS: Number of bytes written)(0)][(HDFS_READ_OPS)(HDFS: Number of read operations)(0)][(HDFS_LARGE_READ_OPS)(HDFS: Number of large read operations)(0)][(HDFS_WRITE_OPS)(HDFS: Number of write operations)(1)]}{(org\.apache\.hadoop\.mapreduce\.TaskCounter)(Map-Reduce Framework)[(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(180)][(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(143708160)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(739024896)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(200998912)]}nullnullnullnullnullnullnullnullnullnullnullnullnull" .
Job JOBID="job_201309201413_0003" JOB_STATUS="RUNNING" .
Task TASKID="task_201309201413_0003_m_000000" TASK_TYPE="MAP" START_TIME="1379684235791" SPLITS="/default/HDP-SLAVE1\.PI\.LOCAL" .
Task TASKID="task_201309201413_0003_m_000001" TASK_TYPE="MAP" START_TIME="1379684235792" SPLITS="/default/HDP-SLAVE2\.PI\.LOCAL" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_0" START_TIME="1379684235797" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_0" TASK_STATUS="FAILED" FINISH_TIME="1379684420508" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="java\.lang\.Throwable: Child Error
    at org\.apache\.hadoop\.mapred\.TaskRunner\.run(TaskRunner\.java:250)
Caused by: java\.io\.IOException: Task process exit with nonzero status of 65\.
    at org\.apache\.hadoop\.mapred\.TaskRunner\.run(TaskRunner\.java:237)
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000001" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000001_0" START_TIME="1379684235798" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000001" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000001_0" TASK_STATUS="FAILED" FINISH_TIME="1379684918509" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="Task attempt_201309201413_0003_m_000001_0 failed to report status for 600 seconds\. Killing!" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_1" START_TIME="1379684420613" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_1" TASK_STATUS="FAILED" FINISH_TIME="1379685147302" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="Task attempt_201309201413_0003_m_000000_1 failed to report status for 600 seconds\. Killing!" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_2" START_TIME="1379685147358" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_2" TASK_STATUS="FAILED" FINISH_TIME="1379685300867" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="org\.apache\.hadoop\.io\.SecureIOUtils$AlreadyExistsException: EEXIST: File exists
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:178)
    at org\.apache\.hadoop\.mapred\.TaskLog\.writeToIndexFile(TaskLog\.java:303)
    at org\.apache\.hadoop\.mapred\.TaskLog\.syncLogs(TaskLog\.java:376)
    at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:270)
    at java\.security\.AccessController\.doPrivileged(Native Method)
    at javax\.security\.auth\.Subject\.doAs(Subject\.java:396)
    at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1332)
    at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:262)
Caused by: EEXIST: File exists
    at org\.apache\.hadoop\.io\.nativeio\.NativeIO\.open(Native Method)
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:172)
    \.\.\. 7 more
" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_3" START_TIME="1379685300874" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000000" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000000_3" TASK_STATUS="FAILED" FINISH_TIME="1379685525375" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="org\.apache\.hadoop\.io\.SecureIOUtils$AlreadyExistsException: EEXIST: File exists
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:178)
    at org\.apache\.hadoop\.mapred\.TaskLog\.writeToIndexFile(TaskLog\.java:303)
    at org\.apache\.hadoop\.mapred\.TaskLog\.syncLogs(TaskLog\.java:376)
    at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:270)
    at java\.security\.AccessController\.doPrivileged(Native Method)
    at javax\.security\.auth\.Subject\.doAs(Subject\.java:396)
    at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1332)
    at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:262)
Caused by: EEXIST: File exists
    at org\.apache\.hadoop\.io\.nativeio\.NativeIO\.open(Native Method)
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:172)
    \.\.\. 7 more
" .
Task TASKID="task_201309201413_0003_m_000000" TASK_TYPE="MAP" TASK_STATUS="FAILED" FINISH_TIME="1379685525375" ERROR="org\.apache\.hadoop\.io\.SecureIOUtils$AlreadyExistsException: EEXIST: File exists
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:178)
    at org\.apache\.hadoop\.mapred\.TaskLog\.writeToIndexFile(TaskLog\.java:303)
    at org\.apache\.hadoop\.mapred\.TaskLog\.syncLogs(TaskLog\.java:376)
    at org\.apache\.hadoop\.mapred\.Child$4\.run(Child\.java:270)
    at java\.security\.AccessController\.doPrivileged(Native Method)
    at javax\.security\.auth\.Subject\.doAs(Subject\.java:396)
    at org\.apache\.hadoop\.security\.UserGroupInformation\.doAs(UserGroupInformation\.java:1332)
    at org\.apache\.hadoop\.mapred\.Child\.main(Child\.java:262)
Caused by: EEXIST: File exists
    at org\.apache\.hadoop\.io\.nativeio\.NativeIO\.open(Native Method)
    at org\.apache\.hadoop\.io\.SecureIOUtils\.createForWrite(SecureIOUtils\.java:172)
    \.\.\. 7 more
" TASK_ATTEMPT_ID="" .
Task TASKID="task_201309201413_0003_m_000002" TASK_TYPE="CLEANUP" START_TIME="1379685525619" SPLITS="" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000001" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000001_1" START_TIME="1379684918586" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="MAP" TASKID="task_201309201413_0003_m_000001" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000001_1" TASK_STATUS="KILLED" FINISH_TIME="1379685525643" HOSTNAME="HDP-MASTER\.PI\.LOCAL" ERROR="" .
MapAttempt TASK_TYPE="CLEANUP" TASKID="task_201309201413_0003_m_000002" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000002_0" START_TIME="1379685525630" TRACKER_NAME="tracker_HDP-MASTER\.PI\.LOCAL:localhost/127\.0\.0\.1:47578" HTTP_PORT="50060" .
MapAttempt TASK_TYPE="CLEANUP" TASKID="task_201309201413_0003_m_000002" TASK_ATTEMPT_ID="attempt_201309201413_0003_m_000002_0" TASK_STATUS="SUCCESS" FINISH_TIME="1379685529379" HOSTNAME="/default/HDP-MASTER\.PI\.LOCAL" STATE_STRING="cleanup" COUNTERS="{(org\.apache\.hadoop\.mapreduce\.FileSystemCounter)(File System Counters)[(FILE_BYTES_READ)(FILE: Number of bytes read)(0)][(FILE_BYTES_WRITTEN)(FILE: Number of bytes written)(285100)][(FILE_READ_OPS)(FILE: Number of read operations)(0)][(FILE_LARGE_READ_OPS)(FILE: Number of large read operations)(0)][(FILE_WRITE_OPS)(FILE: Number of write operations)(0)][(HDFS_BYTES_READ)(HDFS: Number of bytes read)(0)][(HDFS_BYTES_WRITTEN)(HDFS: Number of bytes written)(0)][(HDFS_READ_OPS)(HDFS: Number of read operations)(1)][(HDFS_LARGE_READ_OPS)(HDFS: Number of large read operations)(0)][(HDFS_WRITE_OPS)(HDFS: Number of write operations)(2)]}{(org\.apache\.hadoop\.mapreduce\.TaskCounter)(Map-Reduce Framework)[(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(190)][(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(135462912)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(739024896)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(200998912)]}nullnullnullnullnullnullnullnullnullnullnullnullnull" .
Task TASKID="task_201309201413_0003_m_000002" TASK_TYPE="CLEANUP" TASK_STATUS="SUCCESS" FINISH_TIME="1379685529548" COUNTERS="{(org\.apache\.hadoop\.mapreduce\.FileSystemCounter)(File System Counters)[(FILE_BYTES_READ)(FILE: Number of bytes read)(0)][(FILE_BYTES_WRITTEN)(FILE: Number of bytes written)(285100)][(FILE_READ_OPS)(FILE: Number of read operations)(0)][(FILE_LARGE_READ_OPS)(FILE: Number of large read operations)(0)][(FILE_WRITE_OPS)(FILE: Number of write operations)(0)][(HDFS_BYTES_READ)(HDFS: Number of bytes read)(0)][(HDFS_BYTES_WRITTEN)(HDFS: Number of bytes written)(0)][(HDFS_READ_OPS)(HDFS: Number of read operations)(1)][(HDFS_LARGE_READ_OPS)(HDFS: Number of large read operations)(0)][(HDFS_WRITE_OPS)(HDFS: Number of write operations)(2)]}{(org\.apache\.hadoop\.mapreduce\.TaskCounter)(Map-Reduce Framework)[(SPILLED_RECORDS)(Spilled Records)(0)][(CPU_MILLISECONDS)(CPU time spent \\(ms\\))(190)][(PHYSICAL_MEMORY_BYTES)(Physical memory \\(bytes\\) snapshot)(135462912)][(VIRTUAL_MEMORY_BYTES)(Virtual memory \\(bytes\\) snapshot)(739024896)][(COMMITTED_HEAP_BYTES)(Total committed heap usage \\(bytes\\))(200998912)]}nullnullnullnullnullnullnullnullnullnullnullnullnull" .
Job JOBID="job_201309201413_0003" FINISH_TIME="1379685529549" JOB_STATUS="FAILED" FINISHED_MAPS="0" FINISHED_REDUCES="0" .
4

1 回答 1

0

似乎工作跟踪器无法与任务跟踪器通信。

可能是任务太重了。

于 2013-11-27T02:22:11.853 回答