0. 运行环境
- idea+hadoop 2.9.0 本地调试
- 关于idea上hadoop的配置,见前文
https://blog.csdn.net/wxfghy/article/details/80521577
- 输入文件格式如下,输出以字母分区,分区内部排序,也可以不分区,按ABC排序
刘备 15
关羽 60
张飞 8
刘备 75
关羽 65
张飞 98
刘备 55
刘备 23
关羽 85
张飞 67
张飞 58 - 输出文件按姓名分3个文件保存,格式为人名 分数升序
刘备 15
刘备 23
刘备 55
刘备 75
1. 主方法
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration cfg=new Configuration()
Job job = Job.getInstance(cfg)
//SecondSort为主方法所在类
job.setJarByClass(SecondSort.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(Text.class)
job.setMapOutputKeyClass(SeKey.class)
job.setMapOutputValueClass(IntWritable.class)
job.setMapperClass(SeMaper.class)
job.setReducerClass(SeReduceer.class)
//设置reduce数量为3,默认为1
job.setNumReduceTasks(3)
//设置自定义分区类Partitioner
job.setPartitionerClass(SePart.class)
//输入路径和输出路径的设置
FileInputFormat.addInputPath(job, new Path("d:\\mr\\ssort.txt"))
FileOutputFormat.setOutputPath(job, new Path("d:\\mr\\output"))
System.exit(job.waitForCompletion(true)?0:1)
}
2. map
static class SeMaper extends Mapper<LongWritable,Text,SeKey,IntWritable>{
private SeKey sekey=new SeKey();
private IntWritable svalue=new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("二次排序map");
String line = value.toString();
String[] lines = line.split("\\s");
if (lines==null||lines.length<1){
return;
}
sekey.setMkey(lines[0]+","+lines[1]);
svalue.set(new Integer(lines[1]));
context.write(sekey,svalue);
}
}
3. 组合键类SeKey,实现WritableComparable接口
static class SeKey implements Writable,WritableComparable<SeKey>{
private String mkey;
public String getMkey() {
return mkey;
}
public void setMkey(String mkey) {
this.mkey = mkey;
}
@Override
public int compareTo(SeKey o) {
String[] data1 = this.mkey.split(",");
String[] data2 = o.getMkey().split(",");
int res=data1[0].compareTo(data2[0]);
if(res==0){
res=new Integer(data1[1]).compareTo(new Integer(data2[1]));
}
return res;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.mkey);
}
@Override
public void readFields(DataInput in) throws IOException {
this.mkey = in.readUTF();
}
}
4. 自定义分区类SePart,继承Partitioner
static class SePart extends Partitioner<SeKey,IntWritable>{
@Override
public int getPartition(SeKey seKey, IntWritable intWritable, int numPartitions) {
if (seKey.getMkey().split(",")[0].equals("刘备")) {
return 0;
} else if (seKey.getMkey().split(",")[0].equals("关羽")) {
return 1;
} else {
return 2;
}
}
}
5. reduce
static class SeReduceer extends Reducer<SeKey,IntWritable,Text,Text>{
private Text rkey=new Text();
private Text rvalue=new Text();
@Override
protected void reduce(SeKey skey, Iterable<IntWritable> iter, Context context) throws IOException, InterruptedException {
System.out.println("二次排序reduce");
StringBuilder buf=new StringBuilder();
for (IntWritable it:iter){
buf.append(it);
}
rkey.set(skey.getMkey().split(",")[0]);
rvalue.set(buf.toString());
context.write(rkey,rvalue);
}
}