0

我正在尝试对此处显示的输入数据执行 k 均值算法的聚类: https ://cwiki.apache.org/MAHOUT/clustering-of-synthetic-control-data.html 但是,当地图缩减作业即将进行时发生我得到错误

11/10/16 21:05:57 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_0,     Status : FAILED
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762)
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71)
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613)
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412)
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
at org.apache.hadoop.mapred.Child.main(Child.java:170)

11/10/16 21:06:03 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_1, Status : FAILED
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762)
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71)
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613)
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412)
at   org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50)
at    org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
at org.apache.hadoop.mapred.Child.main(Child.java:170)

11/10/16 21:06:09 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_2,     Status : FAILED
    Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762)
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71)
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613)
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417)
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412)
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
at org.apache.hadoop.mapred.Child.main(Child.java:170)

11/10/16 21:06:18 INFO mapred.JobClient: Job complete: job_201110161920_0008
11/10/16 21:06:18 INFO mapred.JobClient: Counters: 3
11/10/16 21:06:18 INFO mapred.JobClient:   Job Counters 
11/10/16 21:06:18 INFO mapred.JobClient:     Launched map tasks=4
11/10/16 21:06:18 INFO mapred.JobClient:     Data-local map tasks=4
11/10/16 21:06:18 INFO mapred.JobClient:     Failed map tasks=1
Exception in thread "main" java.lang.InterruptedException: K-Means Iteration failed processing output/clusters-0/part-randomSeed
at org.apache.mahout.clustering.kmeans.KMeansDriver.runIteration(KMeansDriver.java:363)
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClustersMR(KMeansDriver.java:310)
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClusters(KMeansDriver.java:237)
at org.apache.mahout.clustering.kmeans.KMeansDriver.run(KMeansDriver.java:152)
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.run(Job.java:149)
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.main(Job.java:60)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:68)
at org.apache.hadoop.util.ProgramDriver.driver(ProgramDriver.java:139)
at org.apache.mahout.driver.MahoutDriver.main(MahoutDriver.java:187)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)

有人可以告诉我如何纠正这个问题。这真的意味着很多。谢谢你的时间。

4

2 回答 2

1

这意味着您没有提供将作业运行到 Hadoop 所需的所有类。您发送给它的 JAR 文件必须打包所有依赖项,包括来自其核心和数学模块的所有 Mahout 类。幸运的是,Mahout 为您做到了这一点。查看它在构建时创建的“作业”文件,该文件出现在target/.

于 2011-10-16T16:32:41.420 回答
0

您缺少org.apache.mahout.math包裹:

Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
于 2012-07-09T15:54:39.497 回答