正序排序:
mapreduce本身自带排序,在map阶段是局部有序,在reduce阶段是全局有序的。
现在我们测试单个文件使用map输出
package com.hnxy.mr.Sort;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
*
* @author 耀君 正序排序 1 北京 2 天津 8 黑龙江 3 河北 5 内蒙古 4 山西 7 吉林 6 辽宁
*/
public class SortMr extends Configured implements Tool {
public static class SortMrMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
LongWritable outkey = new LongWritable();
Text outval = new Text();
String[] str = null;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LongWritable, Text>.Context context)
throws IOException, InterruptedException {
str = value.toString().split("\t");
if (str.length == 2 && null != str) {
outkey.set(Long.parseLong(str[0]));
outval.set(str[1]);
context.write(outkey, outval);
}
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "job1");
job.setJarByClass(SortMr.class);
job.setMapperClass(SortMrMapper.class);
// job.setReducerClass(SortMrReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
// 设置路径
Path in = new Path(args[0]);
Path out = new Path(args[1]);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
// 创建操作HDFS
FileSystem fs = FileSystem.get(conf);
if (fs.exists(out)) {
fs.delete(out, true);
}
boolean con = job.waitForCompletion(true);
if (con) {
System.out.println("execution succeed");
} else {
System.out.println("Execution failed");
}
return 0;
// 执行
}
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new SortMr(), args));
}
}
运行结果:
这是一个文件要是两个文件呢?
运行结果: