目录
1.自定义排序:实现WritableComparable接口重写compareTo()方法
一、前提
(1)数据样例
文件位置 /root/info/data/8/flow.txt
18329153881 2481 24681
18329153882 1116 954
18329153883 2481 24681
18329153884 264 0
18329153881 132 1512
18329153883 240 0
18329153884 1527 2106
18329153882 1543 1684
(2)字段释义
字段中文释义 | 手机号 | 上行流量 | 下行流量 |
字段英文释义 | phone | upflow | downflow |
数据类型 | string | long | long |
(3)项目需求二
在结果的基础上添加一个需求:将统计结果按照总流量倒序排序
(4)需求解析
基本思路:实现自定义的bean来封装流量信息(与前面序列化一致),并将bean作为map输出的key来传输
排序发生的阶段:map输出的KV对 传输到reduce之前
排序的依据是: map输出的key
因此:如果要实现自己需要的排序规则,则可以考虑将排序因素放到key中
二、具体代码
1.自定义排序:实现WritableComparable接口重写compareTo()方法
实现自定义排序的步骤
① 定义一个bean类(Java标准类),实现WritableComparable接口
② 重写序列化方法:write(DataOutput out)和反序列化方法:readFields(DataInput in)
③ 重写compareTo()方法,实现自定义排序
④ 要想把结果显示在文件中,需要重写toString()方法,可用“\t"分开,方便后续用
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
//自定义排序:实现WritableComparable接口重写compareTo()方法
public class FlowBeanSort implements WritableComparable<FlowBeanSort> {
private long upFlow; //总上行流量
private long downFlow; //总下行流量
private long sumFlow; //总流量
//无参构造方法,目的是为了反序列化操作创建对象实例时调用无参构造器
public FlowBeanSort()
{
super();
}
//序列化方法,目的是为了对象的初始化
public FlowBeanSort(long upFlow, long downFlow, long sumFlow){
super();
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = sumFlow;
}
public long getUpFlow(){
return upFlow;
}
public void setUpFlow(long upFlow){
this.upFlow = upFlow;
}
public long getDownFlow(){
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
@Override
//重写序列化方法,将对象的字段信息写入输出流
public void write(DataOutput out) throws IOException{
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
@Override
//重写反序列方法,从输入流中读取各个字段信息
//注意:字段的反序列化顺序必须与序列化保持一致,并且参数类型和个数也一致
public void readFields(DataInput in) throws IOException{
this.upFlow = in.readLong();
this.downFlow = in.readLong();
this.sumFlow = in.readLong();
}
@Override
//重写compareTo()方法,实现自定义序列,按照总流量倒序排序
public int compareTo(FlowBeanSort o){
//自定义降序排序
return this.sumFlow > o.getSumFlow()?-1:1;
}
@Override
//重写toString()方法
public String toString(){
return upFlow + "\t" + downFlow + "\t" +sumFlow;
}
}
2.MapReduce程序开发之自定义排序
2.1 map端程序编写
需求一的输出结果作为排序的输入数据 ,自定义FlowBeanSort(如上) 以FlowBeanSort为map输出的key,手机号作为map输出的value
具体框架代码如下:
public class FlowSumSortMapper extends Mapper<LongWritable,Text,FlowBeanSort,Text>{
}
- 输入数据是上一个统计程序的输出结果,已经是各个手机号的总流量信息
- KEYOUT:自定义FlowBeanSort VALUEOUT:11位手机号
map()方法:
将读取的每一行文本内容拆分为4列:手机号、总上行流量、总下行流量和总流量
对 {总上行流量,总下行流量,总流量} 进行封装 并实现按照总流量进行降序排序
将map的输出端修改为<FlowBeanSort,手机号>发送给ReduceTask
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//输入为上一个统计程序的输出结果,是每个手机号的总流量信息 输出为:<FlowBeanSort,手机号>
public class FlowSumSortMapper extends Mapper <LongWritable,Text,FlowBeanSort,Text> {
@Override
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
//获取一行文本的内容,将其转换为String类型。之后按照分隔符“\t”进行切分
String[] splits = value.toString().split("\t");
//取出手机号
String telephone = splits[0];
//封装对象
FlowBeanSort fbs = new FlowBeanSort();
fbs.setUpFlow(Long.parseLong(splits[1])); //总上行流量
fbs.setDownFlow(Long.parseLong(splits[2])); //总下行流量
fbs.setSumFlow(Long.parseLong(splits[3])); //总流量
//将封装的fbs对象作为key,将手机号作为value,分发给reduce端
context.write(fbs,new Text(telephone));
}
}
2.2 reduce端程序编写
具体框架代码如下:
public class FlowSumSortReducer extends Reducer<FlowBeanSort,Text,Text,FlowBeanSort>{
}
KEYIN: FlowBeanSort VALUEIN:11位手机号
KEYOUT:11位手机号 VALUEOUT:FlowBeanSort
reduce()方法:
接收MapTask的输出结果
将map端输出的K-V调换后输出即可
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
//输入:<FlowBeanSort,11位手机号> 输出:<11位手机号,FlowBeanSort>
public class FlowSumSortReducer extends Reducer<FlowBeanSort,Text,Text,FlowBeanSort> {
@Override
protected void reduce(FlowBeanSort key, Iterable<Text>values,
Context context)throws IOException,InterruptedException{
//遍历集合
for (Text tele : values){
//最终输出结果将手机号作为key,将封装好的流量信息作为value
context.write(new Text(tele),key);
}
}
}
2.3 driver端程序编写
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowSumSortDemo {
public static void main(String[] args)throws IOException,ClassNotFoundException,InterruptedException{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.230.13:9000");
Job job = Job.getInstance(conf);
job.setJarByClass(FlowSumSortDemo.class);
job.setMapperClass(FlowSumSortMapper.class);
job.setReducerClass(FlowSumSortReducer.class);
job.setMapOutputKeyClass(FlowBeanSort.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBeanSort.class);
Path inPath = new Path("/flow/output_sum"); //此路径为上一个程序的输出路径,此路径的数据作为map端的输入数据
Path outPath = new Path("/flow/output_sort");
FileSystem fs = FileSystem.get(conf);
if(fs.exists(outPath)){
fs.delete(outPath,true);
}
FileInputFormat.setInputPaths(job,inPath);
FileOutputFormat.setOutputPath(job,outPath);
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion ? 0:1);
}
}