▶ \blacktriangleright ▶ 排序
-
案例:统计用户的上行流量,下行流量和总流量
-
代码
- Mapper类
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class FlowMapper extends Mapper<LongWritable,Text,Text, FlowBean> { Text k = new Text(); FlowBean flowBean = new FlowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] splites = line.split("\t"); //电话号码 String phone_num = splites[1]; //上行流量 Long upFlow = Long.parseLong(splites[splites.length-3]); //下行流量 Long downFlow = Long.parseLong(splites[splites.length-2]); //当前手机号的总流量 Long sumFlow = downFlow+upFlow; k.set(phone_num); // FlowBean flowBean = new FlowBean(upFlow,downFlow,sumFlow); flowBean.setUpFlow(upFlow); flowBean.setDownFlow(downFlow); flowBean.setSumFlow(sumFlow); context.write(k,flowBean); } }
- Reducer类
import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class FlowReducer extends Reducer<Text, FlowBean,Text,FlowBean> { FlowBean flowBean = new FlowBean(); @Override protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException { long sumUpFlow = 0; long sumDownFlow = 0; long sumFlow = 0; for(FlowBean flowBean:values){ sumUpFlow+=flowBean.getUpFlow(); sumDownFlow+=flowBean.getDownFlow(); } sumFlow = sumUpFlow+sumDownFlow; // FlowBean flowBean = new FlowBean(sumUpFlow,sumDownFlow,sumFlow); flowBean.setUpFlow(sumUpFlow); flowBean.setDownFlow(sumDownFlow); flowBean.setSumFlow(sumFlow); context.write(key,flowBean); } }
- Driver类
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class FlowDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(); // 指定mapper输出数据的kv类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FlowBean.class); // 指定最终输出的数据的kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); // 指定本业务job要使用的mapper/Reducer业务类 job.setMapperClass(FlowMapper.class); job.setReducerClass(FlowReducer.class); // 指定job的输入原始文件所在目录 FileInputFormat.setInputPaths(job, new Path("F:\\test/phone_data.txt")); FileOutputFormat.setOutputPath(job, new Path("F://test/Phone_data")); job.waitForCompletion(true); } }
- 实体类
import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class FlowBean implements Writable { private Long upFlow; private Long downFlow; private Long sumFlow; //一定要有无参构造 public FlowBean() { } public FlowBean(Long upFlow, Long downFlow, Long sumFlow) { this.upFlow = upFlow; this.downFlow = downFlow; this.sumFlow = sumFlow; } //重写序列化方法 @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeLong(upFlow); dataOutput.writeLong(downFlow); dataOutput.writeLong(sumFlow); } //重写反序列化方法 @Override public void readFields(DataInput dataInput) throws IOException { //要和序列化的顺序相同 upFlow = dataInput.readLong(); downFlow = dataInput.readLong(); sumFlow = dataInput.readLong(); } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + sumFlow ; } public Long getUpFlow() { return upFlow; } public void setUpFlow(Long upFlow) { this.upFlow = upFlow; } public Long getDownFlow() { return downFlow; } public void setDownFlow(Long downFlow) { this.downFlow = downFlow; } public Long getSumFlow() { return sumFlow; } public void setSumFlow(Long sumFlow) { this.sumFlow = sumFlow; } }
▶ \blacktriangleright ▶ 全排序(也叫一次排序)
-
案例:统计用户的上行流量,下行流量和总流量,最后结果按照总流量降序输出
-
代码
- 实体类 实现 WritableComparable接口
import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class FlowBean implements WritableComparable<FlowBean> { private Long upFlow; private Long downFlow; private Long sumFlow; public FlowBean() { } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + sumFlow ; } public Long getUpFlow() { return upFlow; } public void setUpFlow(Long upFlow) { this.upFlow = upFlow; } public Long getDownFlow() { return downFlow; } public void setDownFlow(Long downFlow) { this.downFlow = downFlow; } public Long getSumFlow() { return sumFlow; } public void setSumFlow(Long sumFlow) { this.sumFlow = sumFlow; } @Override public void write(DataOutput out) throws IOException { out.writeLong(upFlow); out.writeLong(downFlow); out.writeLong(sumFlow); } @Override public void readFields(DataInput in) throws IOException { upFlow = in.readLong(); downFlow = in.readLong(); sumFlow = in.readLong(); } @Override public int compareTo(FlowBean o) { //小于返回-1 大于返回1 等于0 if(this.sumFlow > o.sumFlow){ return -1; } if(this.sumFlow < o.sumFlow){ return 1; } return 0; } }
- Mapper类
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class FlowOrderMapper extends Mapper<LongWritable, Text,FlowBean,Text> { Text v = new Text(); FlowBean flowBean = new FlowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] splits = line.split("\t"); String phone = splits[0]; long upFlow = Long.parseLong(splits[1]); long downFlow = Long.parseLong(splits[2]); long sumFlow = Long.parseLong(splits[3]); flowBean.setUpFlow(upFlow); flowBean.setDownFlow(downFlow); flowBean.setSumFlow(sumFlow); v.set(phone); context.write(flowBean,v); } }
- Reducer类
import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class FlowOrderReduce extends Reducer<FlowBean, Text,Text,FlowBean> { //按照总流量倒序写入 @Override protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //总流量可能相同 for(Text v:values){ context.write(v,key); } } }
- Driver类
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class FlowOrderDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(); job.setMapOutputKeyClass(FlowBean.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); job.setMapperClass(FlowOrderMapper.class); job.setReducerClass(FlowOrderReduce.class); FileInputFormat.setInputPaths(job,new Path("F://test/part-r-00000")); FileOutputFormat.setOutputPath(job,new Path("F://test/Part")); job.waitForCompletion(true); } }
▶ \blacktriangleright ▶ 二次排序
-
compareTo方法里面有两个条件就是2次排序 由此可知有几个条件就是几次排序
-
案例:统计用户的上行流量,下行流量和总流量,最后结果按照总流量降序输出,如果总流量相同,就按照上行流量降序输出
-
代码
- 实体类 实现 WritableComparable接口
import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class FlowBean implements WritableComparable<FlowBean> { private Long upFlow; private Long downFlow; private Long sumFlow; public FlowBean() { } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + sumFlow ; } public Long getUpFlow() { return upFlow; } public void setUpFlow(Long upFlow) { this.upFlow = upFlow; } public Long getDownFlow() { return downFlow; } public void setDownFlow(Long downFlow) { this.downFlow = downFlow; } public Long getSumFlow() { return sumFlow; } public void setSumFlow(Long sumFlow) { this.sumFlow = sumFlow; } @Override public void write(DataOutput out) throws IOException { out.writeLong(upFlow); out.writeLong(downFlow); out.writeLong(sumFlow); } @Override public void readFields(DataInput in) throws IOException { upFlow = in.readLong(); downFlow = in.readLong(); sumFlow = in.readLong(); } @Override public int compareTo(FlowBean o) { //正常情况小于返回-1 大于返回1 等于0 if(this.sumFlow > o.sumFlow){ return -1; } if(this.sumFlow < o.sumFlow){ return 1; } if(this.sumFlow == o.sumFlow){ if(this.upFlow > o.upFlow){ return -1; } if(this.upFlow < o.upFlow){ return 1; } } return 0; } }
- Mapper类
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class FlowOrderMapper extends Mapper<LongWritable, Text,FlowBean,Text> { Text v = new Text(); FlowBean flowBean = new FlowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] splits = line.split("\t"); String phone = splits[0]; long upFlow = Long.parseLong(splits[1]); long downFlow = Long.parseLong(splits[2]); long sumFlow = Long.parseLong(splits[3]); flowBean.setUpFlow(upFlow); flowBean.setDownFlow(downFlow); flowBean.setSumFlow(sumFlow); v.set(phone); context.write(flowBean,v); } }
- Reducer类
import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class FlowOrderReduce extends Reducer<FlowBean, Text,Text,FlowBean> { //按照总流量倒序写入 @Override protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //总流量可能相同 for(Text v:values){ context.write(v,key); } } }
- Driver类
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class FlowOrderDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(); job.setMapOutputKeyClass(FlowBean.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); job.setMapperClass(FlowOrderMapper.class); job.setReducerClass(FlowOrderReduce.class); FileInputFormat.setInputPaths(job,new Path("F://test/part-r-00000")); FileOutputFormat.setOutputPath(job,new Path("F://test/Part")); job.waitForCompletion(true); } }