实验1:序列化
统计手机号耗费的总上行流量、下行流量、总流量
输入数据
数据格式:手机号 上传流量 下载流量
13965590001 13691 927138
13965590004 6925 647540
13965590002 20479 922428
13965590005 15883 57866
13965590003 13609 920184
13965590002 17979 386331
13965590006 3948 318871
13965590000 16823 935836
13965590006 10849 568449
13965590005 19065 791606
13965590008 13278 596751
13965590000 8468 400582
13965590000 16329 999656
13965590001 16797 642545
13965590007 19192 420749
13965590007 10598 381767
13965590007 7906 30797
13965590007 7914 348779
13965590002 22510 572017
13965590002 2786 441342
13965590008 7989 149352
13965590003 9686 257706
输出结果
数据格式:手机号 上传流量 下载流量 总流量
13965590000 41620 2336074 2377694
13965590001 30488 1569683 1600171
13965590002 63754 2322118 2385872
13965590003 23295 1177890 1201185
13965590004 6925 647540 654465
13965590005 34948 849472 884420
13965590006 14797 887320 902117
13965590007 45610 1182092 1227702
13965590008 21267 746103 767370
FlowBean.class
public class FlowBean implements Writable{
/* 上传流量 */
private long upFlow;
/* 下载流量 */
private long downFlow;
/* 总流量 */
private long sumFlow;
public FlowBean() {
}
public FlowBean(long upFlow, long downFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = upFlow + downFlow;
}
public void setFlowBean(long upFlow, long downFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = upFlow + downFlow;
}
/**
* 反序列化方法:注意反序列化的顺序和序列化的顺序完全一致
*/
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow = in.readLong();
this.downFlow = in.readLong();
this.sumFlow = in.readLong();
}
/**
* 序列化方法
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
// get and set...
}
FlowMapper.class
public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
Text k = new Text();
FlowBean v = new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 13965590002 916254 62132
// 获取1行
String line = value.toString();
// 切割数据
String[] datas = line.split("\t");
// 封装数据
String phone = datas[0];
long upFlow = Long.parseLong(datas[1]);
long downFlow = Long.parseLong(datas[2]);
k.set(phone);
v.setFlowBean(upFlow, downFlow);
// 写入数据
context.write(k, v);
}
}
FlowReduce.class
public class FlowReduce extends Reducer<Text, FlowBean, Text, FlowBean> {
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context)
throws IOException, InterruptedException {
// 累加求和
int sunUpFlow = 0;
int sunDownFlow = 0;
for (FlowBean flowBean : values) {
sunUpFlow += flowBean.getUpFlow();
sunDownFlow += flowBean.getDownFlow();
}
FlowBean v = new FlowBean(sunUpFlow, sunDownFlow);
context.write(key, v);
}
}
FlowDriver.class
public class FlowDriver implements Tool {
Configuration conf = null;
@Override
public Configuration getConf() {
return conf;
}
@Override
public void setConf(Configuration configuration) {
this.conf = configuration;
}
@Override
public int run(String[] args) throws Exception {
// 获取 job
Job job = Job.getInstance(conf);
// 设置jar报位置
job.setJarByClass(getClass());
// 关联Mapper Reduce
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReduce.class);
// 设置Mapper 输出 Key Value 类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
// 设置最终输出Key Value 类型
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(FlowBean.class);
// 设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 提交job
boolean code = job.waitForCompletion(true);
return code ? 0 : 1;
}
public static void main(String[] args) throws Exception {
args = new String[] { "e:/input/flow1", "e:/output/flow2" };
int run = ToolRunner.run(new FlowDriver(), args);
System.exit(run);
}
}
实验2:全排序
对实验1产生的结果再次对总流量进行倒序排序(全排序)
- 原理
MR程序在处理数据的过程中会对数据排序(map输出的kv对传输到reduce之前,会排序),排序的依据是map输出的key 所以,我们如果要实现自己需要的排序规则,则可以考虑将排序因素放到key中,让key实现接口:WritableComparable。然后重写key的compareTo方法。
输入数据
数据格式:手机号 上传流量 下载流量
13965590000 41620 2336074 2377694
13965590001 30488 1569683 1600171
13965590002 63754 2322118 2385872
13965590003 23295 1177890 1201185
13965590004 6925 647540 654465
13965590005 34948 849472 884420
13965590006 14797 887320 902117
13965590007 45610 1182092 1227702
13965590008 21267 746103 767370
输出数据
手机号 上传总流量 下载总流量 总流量(降序)
13965590002 63754 2322118 2385872
13965590000 41620 2336074 2377694
13965590001 30488 1569683 1600171
13965590007 45610 1182092 1227702
13965590003 23295 1177890 1201185
13965590006 14797 887320 902117
13965590005 34948 849472 884420
13965590008 21267 746103 767370
13965590004 6925 647540 654465
FlowBean.class
在实验1 的基础上,进行修改,实现 WritableComparable接口
public class FlowBean implements WritableComparable<FlowBean> {
@Override
public int compareTo(FlowBean fb) {
if (fb.getSumFlow() < this.getSumFlow())
return 0;
return 1;
}
}
FlowSortMapper.class
- 实验1:Mapper<LongWritable, Text, Text, FlowBean>
- 实验2:Mapper<LongWritable, Text, FlowBean, Text>
- 排序的依据是map输出的key,所以实验2 map的key为FlowBean类型。
public class FlowSortMapper extends Mapper<LongWritable, Text, FlowBean, Text> {
Text v = new Text();
FlowBean k = new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 13965590002 916254 62132
// 获取1行
String line = value.toString();
// 切割数据
String[] datas = line.split("\t");
// 封装数据
String phone = datas[0];
long upFlow = Long.parseLong(datas[1]);
long downFlow = Long.parseLong(datas[2]);
v.set(phone);
k.setFlowBean(upFlow, downFlow);
// 写入数据
context.write(k, v);
}
}
FlowSortReduce.class
public class FlowSortReduce extends Reducer<FlowBean, Text, Text, FlowBean> {
@Override
protected void reduce(FlowBean key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// reduce方法则直接输出结果即可
for (Text text : values) {
context.write(text, key);
}
}
}
FlowSortDriver.class
// 关联Mapper Reduce
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReduce.class);
// 设置Mapper 输出 Key Value 类型
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);