1>这是我的主要方法
package dataAnalysis;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class Weather {
public static void main(String[] args) {
JobConf conf=new JobConf();
Job job;
try {
job = new Job(conf,"WeatherDataExtraction");
job.setJobName("WeatherDataExtraction");
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,new Path("E:\\Nitin\\Programming\\DATA\\01001.dat\\01001.dat"));
FileOutputFormat.setOutputPath(conf,new Path("E:\\Nitin\\output20.txt"));
try {
job.waitForCompletion(true);
} catch (ClassNotFoundException | IOException | InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
2>THIS IS MY MAPPER CLASS 数据文件的格式如下。如果第一个字符是 #,则该行包含有关记录数据的年份的信息,直到记录下带有 # 的下一行,它包含我使用子字符串提取的有关温度的数据
package dataAnalysis;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable,Text,Text,IntWritable> {
private final int ERROR=9999;
static String year;
private float airtemp;
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
{
String line=value.toString();
if(line.charAt(0)=='#') {
year=line.substring(6,9);
}
else {
if(line.substring(15,20)!="9999" {
airtemp=Float.parseFloat(line.substring(15,20));
context.write(new Text(year),new IntWritable((int)airtemp));
}
}
}
}
3>这是我下面的减速器类
package dataAnalysis;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException
{
Integer max=new Integer(0);
for(IntWritable val:values) {
if (val.get()>max.intValue()) { max=val.get();}
}
context.write(key,new IntWritable(max.intValue()));
}
}
4>这些是我在 2013 年 9 月 29 日上午 1:24:51 org.apache.hadoop.util.NativeCodeLoader收到的错误
WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
Sep 29, 2013 1:24:51 AM org.apache.hadoop.mapred.JobClient$2 run
INFO: Cleaning up the staging area file:/tmp/hadoop-Nitin/mapred/staging/Nitin- 2062417840/.staging/job_local_0001
org.apache.hadoop.mapred.InvalidJobConfException: Output directory not set.
at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.j ava:125)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:881)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:842)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:842)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:465)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:495)
at dataAnalysis.Weather.main(Weather.java:30)
5>分析的.dat文件格式的链接 http://www1.ncdc.noaa.gov/pub/data/igra/readme.txt