映射器类
import java.io.IOException;
import java.util.TreeSet;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MapperTopNMR
extends Mapper<LongWritable, Text, NullWritable, Text>
{
//TreeSet<Text> fatcats = new TreeSet<Text>(new SelComp());
TreeSet<Text> fatcats = new TreeSet<Text>();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
fatcats.add(new Text(value));
if (fatcats.size() > 3)
{
fatcats.remove(fatcats.first());
}
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException
{
for ( Text catname : fatcats )
{
context.write( NullWritable.get(),catname);
}
}
}
MAPPER 类的 MRUNIT
public class TopNMRTest
{
MapDriver<LongWritable, Text, NullWritable, Text> mapDriver;
@Before
public void setUp()
{
MapperTopNMR mapper = new MapperTopNMR();
mapDriver = new MapDriver<LongWritable, Text, NullWritable, Text>();
mapDriver.setMapper(mapper);
}
@Test
public void testMapper() throws IOException
{
mapDriver.withInput(new LongWritable(1), new Text("11"));
mapDriver.withInput(new LongWritable(1), new Text("15"));
mapDriver.withInput(new LongWritable(1), new Text("3"));
mapDriver.withInput(new LongWritable(1), new Text("3"));
mapDriver.withInput(new LongWritable(1), new Text("7"));
mapDriver.withOutput(NullWritable.get(), new Text("7"));
mapDriver.withOutput(NullWritable.get(), new Text("11"));
mapDriver.withOutput(NullWritable.get(), new Text("15"));
mapDriver.runTest();
}
我期待结果为
(空) 7
(空) 11
(空) 15
但是当我打印输出时,存储在 Treeset 中的值不在
顺序,它给出了在这个例子中插入的方式
树集包含:11 15 3 7 (another 3 is duplicate it was eliminated).
注意:TreeSet - 消除重复而不给出自然顺序。即使我尝试了 TreeSet 实例的比较器来颠倒顺序,它也会给出以下结果
7 3 15 11.
请帮助我摆脱这个问题。