1

嗨,我正在尝试使用以下 map-reduce 代码解析我的 XML 文件并在输出中创建一个 CSV。

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class XMLParseMR {

   public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
      private final static IntWritable one = new IntWritable(1);
      public static String outputFile = null;
      private Text word = new Text();
      private JAXBC jax = new JAXBC();

      public void map(LongWritable key, Text value, Context context) throws 
          IOException, InterruptedException {

        String document = value.toString();
        System.out.println("XML : "+ document);
        try {
          ConnectHome ch = jax.convertJAXB(document);
      jax.convertCSV(ch, outputFile);
    } 
        catch (JAXBException e) {
          // TODO Auto-generated catch block
      e.printStackTrace();
    }
         } 
      }

 public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    conf.set("xmlinput.start", "<ConnectHome>");          
    conf.set("xmlinput.end", "</ConnectHome>");     
    job.setInputFormatClass(XMLInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Map.outputFile = args[1];
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));


    job.waitForCompletion(true);
   }
  }

我还有一个名为 Connect_Home 的类,我在其中解析使用 JAXB 提取数据的数据。但是当我运行代码时,出现以下错误:

WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. 
        Applications should implement Tool for the same.
    WARN mapred.JobClient: No job jar file set.  User classes may not be found. See      
        JobConf(Class) or JobConf#setJar(String).
    INFO input.FileInputFormat: Total input paths to process : 1
    INFO util.NativeCodeLoader: Loaded the native-hadoop library
    WARN snappy.LoadSnappy: Snappy native library not loaded
    INFO mapred.JobClient: Running job: job_201303121556_0011
    mapred.JobClient:  map 0% reduce 0%
    INFO mapred.JobClient: Task Id : attempt_201303121556_0011_m_000000_0, Status : 
         FAILED
    java.lang.RuntimeException: java.lang.ClassNotFoundException: XMLParseMR$Map
            at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1004)
        org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:217)
            at javax.security.auth.Subject.doAs(Subject.java:396)
            at 

       org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
            at org.apache.hadoop.mapred.Child.main(Child.java:260)
    Caused by: java.lang.ClassNotFoundException: XMLParseMR$Map
            at java.net.URLClassLoader$1.run(URLClassLoader.java:200)
            at sun.misc.Launcher$AppClassLoader.loadCl
    INFO mapred.JobClient: Task Id : attempt_201303121556_0011_m_000000_1, Status :  
       FAILED
        java.lang.RuntimeException: java.lang.ClassNotFoundException: 
       XMLParseMR$Map
            at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1004)
            at 
       org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:217)
            at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:602)
            at javax.security.auth.Subject.doAs(Subject.java:396)
            at 
      org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
            at org.apache.hadoop.mapred.Child.main(Child.java:260)
    Caused by: java.lang.ClassNotFoundException: XMLParseMR$Map
            at java.net.URLClassLoader$1.run(URLClassLoader.java:200)
            at java.security.AccessController.doPrivileged(Native Method)
            at sun.misc.Launcher$AppClassLoader.loadCl
    INFO mapred.JobClient: Task Id : attempt_201303121556_0011_m_000000_2, Status : 
       FAILED
    java.lang.RuntimeException: java.lang.ClassNotFoundException: XMLParseMR$Map
            at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1004)
            at 
      org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:217)
            at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:602)
            at org.apache.hadoop.mapred.MapTask.run(MapTask.java:323)
            at org.apache.hadoop.mapred.Child$4.run(Child.java:266)
            at java.security.AccessController.doPrivileged(Native Method)
            at javax.security.auth.Subject.doAs(Subject.java:396)
            at 

       org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
            at org.apache.hadoop.mapred.Child.main(Child.java:260)
    Caused by: java.lang.ClassNotFoundException: XMLParseMR$Map
            at java.net.URLClassLoader$1.run(URLClassLoader.java:200)
            at sun.misc.Launcher$AppClassLoader.loadCl
    INFO mapred.JobClient: Job complete: job_201303121556_0011
    INFO mapred.JobClient: Counters: 7INFO mapred.JobClient:   Job Counters
    INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=20097
    INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving 
      slots (ms)=0
    INFO mapred.JobClient:     Total time spent by all maps waiting after reserving 
      slots (ms)=0
    INFO mapred.JobClient:     Launched map tasks=4
    INFO mapred.JobClient:     Data-local map tasks=4
    INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=0
    INFO mapred.JobClient:     Failed map tasks=1
4

1 回答 1

1

错误信息:

WARN mapred.JobClient: No job jar file set.  User classes may not be found. See      
    JobConf(Class) or JobConf#setJar(String).

告诉您,作业没有正确设置。setJarByClass()您可以让 Hadoop 通过在设置作业时调用来自行确定 JAR,而不是按名称设置 JAR :

Job job = new Job(conf, "wordcount");
job.setJarByClass(XMLParseMR.class);

它将根据您的类名设置作业的JAR。之后您的作业应该可以正常运行,并且上面提到的错误消息将消失。

于 2013-03-20T20:01:43.560 回答