我是 MapReduce 应用程序的新手。我只是想在我的数据集上找到单词的长度,并根据它们的长度将它们分类为小、小、中、大,最后,我想看看总数有多少词是小、小、中或者我在 Java 中的数据集很大,但我在实现 reducer 时遇到了问题。当我在 Hadoop 集群上执行 jar 文件时,它不会返回任何结果。如果有人帮助我,我将不胜感激。这是我尝试执行的减速器代码,但我猜有很多错误。
public class WordSizeReducer extends Reducer<IntWritable, IntWritable, Text, IntWritable> {
private IntVariable result = new IntVariable();
IntWritable tin, smal, mediu,bi;
int t, s, m, b;
int count;
Text tiny, small, medium, big;
public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{
for (IntWritable val:values){
if(val.get() == 1){
tin.set(t);
t++;
}
else if(2<=val.get() && val.get()<=4){
smal.set(s);
s++;
}
else if(5<=val.get() && val.get()<=9){
mediu.set(m);
m++;
}
else if(10<=val.get()){
bi.set(b);
b++; }
}
context.write(tiny, tin);
context.write(small, smal);
context.write(medium, mediu);
context.write(big, bi);
}
}
public class WordSizeMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private IntWritable wordLength = new IntWritable();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
wordLength.set(tokenizer.nextToken().length());
context.write(wordLength, one);
}
}
}