1

1>这是我的主要方法

package dataAnalysis;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class Weather {

public static void main(String[] args) {
    JobConf conf=new JobConf();
    Job job;
    try {
        job = new Job(conf,"WeatherDataExtraction");
        job.setJobName("WeatherDataExtraction");
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job,new    Path("E:\\Nitin\\Programming\\DATA\\01001.dat\\01001.dat"));
        FileOutputFormat.setOutputPath(conf,new Path("E:\\Nitin\\output20.txt"));
        try {
            job.waitForCompletion(true);
        } catch (ClassNotFoundException | IOException | InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
}

2>THIS IS MY MAPPER CLASS 数据文件的格式如下。如果第一个字符是 #,则该行包含有关记录数据的年份的信息,直到记录下带有 # 的下一行,它包含我使用子字符串提取的有关温度的数据

package dataAnalysis;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<LongWritable,Text,Text,IntWritable> {
        private final int ERROR=9999;
        static String year;
        private float airtemp; 
    public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
    {
        String line=value.toString();
        if(line.charAt(0)=='#') {
            year=line.substring(6,9);
        }
        else {
            if(line.substring(15,20)!="9999" { 
                airtemp=Float.parseFloat(line.substring(15,20));
                context.write(new Text(year),new IntWritable((int)airtemp));
            }
        }
    } 
}

3>这是我下面的减速器类

package dataAnalysis;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{

public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException
{
    Integer max=new Integer(0);
    for(IntWritable val:values) {
          if (val.get()>max.intValue()) { max=val.get();}
    }
    context.write(key,new IntWritable(max.intValue()));
}    
} 

4>这些是我在 2013 年 9 月 29 日上午 1:24:51 org.apache.hadoop.util.NativeCodeLoader收到的错误

WARNING: Unable to load native-hadoop library for your platform... using builtin-java      classes where applicable

Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: Use GenericOptionsParser for parsing the arguments. Applications should  implement Tool for the same.
Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: No job jar file set.  User classes may not be found. See JobConf(Class) or     JobConf#setJar(String).
Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient$2 run
INFO: Cleaning up the staging area file:/tmp/hadoop-Nitin/mapred/staging/Nitin- 2062417840/.staging/job_local_0001
org.apache.hadoop.mapred.InvalidJobConfException: Output directory not set.
at      org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.j ava:125)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:881)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:842)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at     org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
    at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:842)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:465)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:495)
at dataAnalysis.Weather.main(Weather.java:30)

5>分析的.dat文件格式的链接 http://www1.ncdc.noaa.gov/pub/data/igra/readme.txt

4

0 回答 0