由于MR中的Reudce中默认是按Key的字典序由小到大排序的,顺序排序代码
输入数据
23112
321
2
213
21
4
21
421
5
12
2313
25
21
21
5
25
215
2
5
输出数据
2
2
4
5
5
5
12
21
21
21
21
25
25
213
215
321
421
2313
23112
为了可以输出重复的数,我们这里在map输出中产生了一个随机数,也可以用1,只要能够统计reduce中values迭代器中有多少个数就行
import java.io.*;
import java.util.Random;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;
public class sort extends Configured implements Tool{
enum Counter
{
LineSkip,
}
public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
{
private static Text line=new Text();
@Override
public void map(Object key,Text value,Context context)throws IOException,InterruptedException
{
line=value;
IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
context.write(i, new IntWritable(new Random().nextInt()));
}
}
public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
{
private static Text line=new Text();
@Override
public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
{
while(value.iterator().hasNext())
{
context.write(key, null);
value.iterator().next();
}
}
}
public int run(String[] args)throws Exception
{
Configuration conf=getConf();
Job job=new Job(conf,"sort");
job.setJarByClass(sort.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(map.class);
job.setReducerClass(reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
return job.isSuccessful()?0:1;
}
public static void main(String[] args)throws Exception
{
int res=ToolRunner.run(new Configuration(),new sort(),args);
System.exit(res);
}
}
为了输出
23112
2313
421
321
215
213
25
25
21
21
21
21
12
5
5
5
4
2
2
我们这里采用了比较器,Job提供了setSortComparrtorClass接口,可以修改Reduce中对Key的默认排序
import java.io.*;
import java.util.Random;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;
public class reversesort extends Configured implements Tool{
enum Counter
{
LineSkip,
}
public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
{
private static Text line=new Text();
@Override
public void map(Object key,Text value,Context context)throws IOException,InterruptedException
{
line=value;
IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
context.write(i, new IntWritable(new Random().nextInt()));
}
}
public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
{
private static Text line=new Text();
@Override
public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
{
while(value.iterator().hasNext())
{
context.write(key, null);
value.iterator().next();
}
}
}
public static class intwritablecompare extends IntWritable.Comparator
{
@Override
public int compare(byte[] b1,int s1,int e1,byte[] b2,int s2,int e2)
{
return super.compare(b2, s2,e2,b1,s1,e1);
}
}
public int run(String[] args)throws Exception
{
Configuration conf=getConf();
Job job=new Job(conf,"sort");
job.setJarByClass(reversesort.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(map.class);
job.setReducerClass(reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.setSortComparatorClass(intwritablecompare.class);
job.waitForCompletion(true);
return job.isSuccessful()?0:1;
}
public static void main(String[] args)throws Exception
{
int res=ToolRunner.run(new Configuration(),new reversesort(),args);
System.exit(res);
}
}