二次排序
输入
20 21
50 51
50 53
50 52
50 54
60 51
60 53
60 52
60 56
60 57
70 58
60 61
70 54
70 55
70 56
70 57
70 58
结果:
20 21
50 51
50 52
50 53
50 54
60 51
60 52
60 53
60 56
60 57
60 61
70 54
70 55
70 56
70 57
70 58
70 58
方法1:
第一组利用shuffle中的排序特点,但只能升序,第二列放到list中(然后对list排序),遍历list输出
方法2:
自定义数据类型
SortSecondaryDemo.java
package MR;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class SortSecondaryDemo implements Tool {
/**
* map阶段
* @author lyd
*
*/
public static class MyMapper extends Mapper<LongWritable, Text, SecondarySortWritable, IntWritable> {
SecondarySortWritable ss = new SecondarySortWritable();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String dig [] = line.split(" ");
ss.setFirst(Integer.parseInt(dig[0]));
ss.setSecond(Integer.parseInt(dig[1]));
context.write(ss, new IntWritable(Integer.parseInt(dig[1])));
}
}
/**
* reduce阶段
* @author lyd
*
*/
public static class MyReducer extends Reducer<SecondarySortWritable, IntWritable, SecondarySortWritable, IntWritable> {
@Override
protected void reduce(SecondarySortWritable key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
for (IntWritable i : values){
context.write(key,i);
}
}
}
public void setConf(Configuration conf) {
//对conf的属性设置
}
public Configuration getConf() {
return new Configuration();
}
/**
* 驱动方法
*/
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = Job.getInstance(conf, "ssjob");
job.setJarByClass(SortSecondaryDemo.class);
// set inputpath and outputpath
setInputAndOutput(job, conf, args);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(SecondarySortWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(SecondarySortWritable.class);
job.setOutputValueClass(IntWritable.class);
//提交
return job.waitForCompletion(true) ? 0 : 1;
}
//主方法
public static void main(String[] args) throws Exception {
int isok= ToolRunner.run(new Configuration(), new SortSecondaryDemo(), args);
System.exit(isok);
}
/**
* 处理参数的方法
* @param job
* @param conf
* @param args
*/
public static void setInputAndOutput(Job job,Configuration conf,String[] args){
//正常处理输入输出参数
try {
FileInputFormat.addInputPath(job, new Path(args[0]));
//FileSystem fs = FileSystem.get(conf);
Path outputPath = new Path(args[1]);
/*if(fs.exists(outputPath)){
fs.delete(outputPath, true);
}*/
FileOutputFormat.setOutputPath(job, outputPath);
} catch (Exception e) {
e.printStackTrace();
}
}
}
SecondarySortWritable.java
package MR;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Objects;
/**
*
*/
public class SecondarySortWritable implements WritableComparable<SecondarySortWritable> {
private int first;
private int second;
public void write(DataOutput out) throws IOException {
out.writeInt(this.first);
out.writeInt(this.second);
}
public void readFields(DataInput in) throws IOException {
this.first = in.readInt();
this.second = in.readInt();
}
int getFirst() {
return first;
}
void setFirst(int first) {
this.first = first;
}
public int getSecond() {
return second;
}
void setSecond(int second) {
this.second = second;
}
public int compareTo(SecondarySortWritable o) {
return this.first - o.first;
//return o.second - this.second; //降序
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SecondarySortWritable that = (SecondarySortWritable) o;
return first == that.first &&
second == that.second;
}
@Override
public int hashCode() {
return Objects.hash(first, second);
}
@Override
public String toString() {
return "first=" + first +
", second=" + second;
}
}