1

我正在尝试获取DistributedCache。我在两个节点上使用 Apache Hadoop 1.2.1。

我参考了Cloudera 帖子,该帖子在其他帖子中进行了简单扩展,解释了如何使用-libjars使用第三方 jar

笔记:

在我的 jar 中,我没有包含任何 jar 库。- 既不是 Hadoop 核心也不是 commons lang。

编码 :

public class WordCounter extends Configured implements Tool {

    @Override
    public int run(String[] args) throws Exception {
        // TODO Auto-generated method stub

        // Job job = new Job(getConf(), args[0]);
        Job job = new Job(super.getConf(), args[0]);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setJarByClass(WordCounter.class);

        FileInputFormat.setInputPaths(job, new Path(args[1]));
        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        job.setMapperClass(WCMapper.class);
        job.setReducerClass(WCReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        int jobState = job.waitForCompletion(true) ? 0 : 1;

        return jobState;
    }

    public static void main(String[] args) throws Exception {
        // TODO Auto-generated method stub

        if (args == null || args.length < 3) {
            System.out.println("The below three arguments are expected");
            System.out
                    .println("<job name> <hdfs path of the input file> <hdfs path of the output file>");
            return;
        }

        WordCounter wordCounter = new WordCounter();

        // System.exit(ToolRunner.run(wordCounter, args));
        System.exit(ToolRunner.run(new Configuration(), wordCounter, args));
    }

}

Mapper 类是幼稚的,它只尝试使用来自 Apache Commons 的 StringUtils(而不是 hadoop)

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author 298790
 * 
 */
public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    private static IntWritable one = new IntWritable(1);

    @Override
    protected void map(
            LongWritable key,
            Text value,
            org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        StringTokenizer strTokenizer = new StringTokenizer(value.toString());
        Text token = new Text();

        while (strTokenizer.hasMoreTokens()) {
            token.set(strTokenizer.nextToken());
            context.write(token, one);
        }

        System.out.println("Converting " + value + " to upper case "
                + StringUtils.upperCase(value.toString()));
    }
}

我使用的命令:

bigdata@slave3:~$ export HADOOP_CLASSPATH=dumphere/lib/commons-lang3-3.1.jar
bigdata@slave3:~$
bigdata@slave3:~$ echo $HADOOP_CLASSPATH
dumphere/lib/commons-lang3-3.1.jar
bigdata@slave3:~$
bigdata@slave3:~$ echo $LIBJARS
dumphere/lib/commons-lang3-3.1.jar
bigdata@slave3:~$ hadoop jar dumphere/code/jars/hdp_3rdparty.jar com.hadoop.basics.WordCounter "WordCount" "/input/dumphere/Childhood_days.txt" "/output/dumphere/wc" -libjars ${LIBJARS}

我得到的例外:

Warning: $HADOOP_HOME is deprecated.

14/08/13 21:56:05 INFO input.FileInputFormat: Total input paths to process : 1
14/08/13 21:56:05 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/08/13 21:56:05 WARN snappy.LoadSnappy: Snappy native library not loaded
14/08/13 21:56:05 INFO mapred.JobClient: Running job: job_201408111719_0190
14/08/13 21:56:06 INFO mapred.JobClient:  map 0% reduce 0%
14/08/13 21:56:37 INFO mapred.JobClient: Task Id : attempt_201408111719_0190_m_000000_0, Status : FAILED
Error: java.lang.ClassNotFoundException: org.apache.commons.lang3.StringUtils
        at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
        at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
        at com.hadoop.basics.WCMapper.map(WCMapper.java:40)
        at com.hadoop.basics.WCMapper.map(WCMapper.java:1)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)

14/08/13 21:56:42 INFO mapred.JobClient: Task Id : attempt_201408111719_0190_m_000000_1, Status : FAILED
Error: java.lang.ClassNotFoundException: org.apache.commons.lang3.StringUtils
        at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
        at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:423)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:356)
        at com.hadoop.basics.WCMapper.map(WCMapper.java:40)
        at com.hadoop.basics.WCMapper.map(WCMapper.java:1)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)

Cloudera 帖子提到:

The jar will be placed in distributed cache and will be made available to all of the job’s task attempts. More specifically, you will find the JAR in one of the ${mapred.local.dir}/taskTracker/archive/${user.name}/distcache/… subdirectories on local nodes.

但是在那条路上,我找不到 commons-lang3-3.1.jar

我错过了什么?

4

0 回答 0