在mapreduce中默认排序(正序),本文通过新建keyCompartor实现倒序
可以发现其中方法可以对数据进行去重
sortLaunch
public class SortLaunch {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
//log4j中日志打印
BasicConfigurator.configure();
//通过job实例获取job对象
Job job = Job.getInstance();
//设置启动类
job.setJarByClass(SortLaunch.class);
//设置名字
job.setJobName("sort");
//配置Mapper
job.setMapperClass(SortMapper.class);
//配置reduce
job.setReducerClass(SortReduce.class);
//配置key比较器
job.setSortComparatorClass(KeyCompartor.class);
//配置Mapper(key)输出类型
job.setMapOutputKeyClass(IntWritable.class);
//配置Mapper(value)输出类型
job.setMapOutputValueClass(NullWritable.class);
//配置key输出类型
job.setOutputKeyClass(IntWritable.class);
//配置value输出类型
job.setOutputValueClass(NullWritable.class);
//通过job配置获取FileSystem
FileSystem fs = FileSystem.get(job.getConfiguration());
//获取文件输出路径
Path out = new Path("D:/ideaProjects/hadoop_pro/sort1/output");
if (fs.exists(out)){
fs.delete(out,true);
}
//获取文件输入路径
FileInputFormat.addInputPath(job,new Path("D:/ideaProjects/hadoop_pro/sort1/input"));
//设置文件输出路径
FileOutputFormat.setOutputPath(job,out);
//设置reduce数量
job.setNumReduceTasks(1);
//提交job
job.waitForCompletion(true);
}
}
sortMapper
public class SortMapper extends Mapper<LongWritable, Text, IntWritable, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
IntWritable outKey = new IntWritable();
String line = value.toString();
outKey.set(Integer.parseInt(line));
context.write(outKey, NullWritable.get());
}
}
sortReduce
public class SortReduce extends Reducer<IntWritable, NullWritable,IntWritable,NullWritable> {
@Override
protected void reduce(IntWritable key, Iterable<NullWritable> values, Reducer<IntWritable, NullWritable, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
for (NullWritable value : values) {
context.write(key,NullWritable.get());
}
}
}
keyCompartor
public class KeyCompartor extends WritableComparator {
public KeyCompartor(){
super(IntWritable.class,true);
}
/**
*
* @param a
* @param b
* @return 0 相等 1 大于 -1 小于
*/
@Override
public int compare(WritableComparable a, WritableComparable b) {
IntWritable left = (IntWritable) a;
IntWritable right = (IntWritable) b;
return -(left.compareTo(right));
}
}
intput
15
46
42
1
32
33
3
3
4
output
46
42
33
32
15
4
3
3
1
去重
SortMapper
public class SortMapper extends Mapper<LongWritable, Text,IntWritable, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
IntWritable k =new IntWritable();
String line = value.toString();
k.set(Integer.parseInt(line));
context.write(k,NullWritable.get());
}
}
SortReduce
public class SortReduce extends Reducer<IntWritable, NullWritable,IntWritable,NullWritable> {
@Override
protected void reduce(IntWritable key, Iterable<NullWritable> values, Reducer<IntWritable, NullWritable, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
SortLaunch
public class SortLaunch {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
BasicConfigurator.configure();
Job job = Job.getInstance();
job.setJarByClass(SortLaunch.class);
job.setJobName("sort");
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReduce.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(NullWritable.class);
FileSystem fs = FileSystem.get(job.getConfiguration());
Path out = new Path("D:/ideaProjects/hadoop_pro/quchong/output");
if (fs.exists(out)){
fs.delete(out,true);
}
FileInputFormat.addInputPath(job,new Path("D:/ideaProjects/hadoop_pro/quchong/input"));
FileOutputFormat.setOutputPath(job,out);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
}
}
input
15
46
42
1
32
33
3
3
4
output
1
3
4
15
32
33
42
46