1 ,需求 :
统计每个用户的上传,下载的流量的和
2 ,数据 :data_flow.dat
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 游戏娱乐 1 1 1 1 200
1363157995052 13726230503 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 jd.com 京东购物 1 1 1 1 200
1363157991076 13726230503 20-10-7A-28-CC-0A:CMCC 120.196.100.99 taobao.com 淘宝购物 1 1 1 1 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 cnblogs.com 技术门户 1 1 1 1 200
1363157993044 13926251106 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 1 1 1 1 200
1363157995074 13926251106 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 未知 1 1 1 1 200
1363157993055 13926251106 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 sougou.com 综合门户 1 1 1 1 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.360.cn 信息安全 1 1 1 1 200
1363157983019 15920133257 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 baidu.com 综合搜索 1 1 1 1 200
1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 1 1 1 1 200
1363157973098 15920133257 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 1 1 1 1 200
1363157986029 13660577991 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 1 1 1 1 200
1363157992093 13660577991 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 zhilian.com 招聘门户 1 1 1 1 200
1363157986041 13922314466 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 csdn.net 技术门户 1 1 1 1 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 1 1 1 1 200
1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 图片大全 1 1 1 1 200
1363157982040 13823070001 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 1 1 1 1 200
1363157986072 13823070001 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 1 1 1 1 200
1363157990043 13600217502 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 1 1 1 1 200
1363157988072 13600217502 00-FD-07-A4-7B-08:CMCC 120.196.100.82 http://youku.com/ 视频网站 1 1 1 1 200
1363157985079 13823070001 20-7C-8F-70-68-1F:CMCC 120.196.100.99 img.qfc.cn 图片浏览 1 1 1 1 200
1363157985069 13600217502 00-1F-64-E2-E8-B1:CMCC 120.196.100.55 www.baidu.com 综合门户 1 1 1 1 200
3 ,数据解析 :
4 ,思路 :
- 明确需求 : 统计每个人 ( 手机号码 ) 的上行 6,7,8,9 字段,分别求和。
- 具体做法 : 手机号是 k2 ,其他的数据都是 v2
5 ,pojo :
package day02.ll;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class LlPojo implements Writable {
// 4 个属性
private Long upPackage;
private Long downPackage;
private Long upData;
private Long downData;
// get,set,toString
public Long getUpPackage() {
return upPackage;
}
public void setUpPackage(Long upPackage) {
this.upPackage = upPackage;
}
public Long getDownPackage() {
return downPackage;
}
public void setDownPackage(Long downPackage) {
this.downPackage = downPackage;
}
public Long getUpData() {
return upData;
}
public void setUpData(Long upData) {
this.upData = upData;
}
public Long getDownData() {
return downData;
}
public void setDownData(Long downData) {
this.downData = downData;
}
@Override
public String toString() {
return "LlPojo{" +
"upPackage=" + upPackage +
", downPackage=" + downPackage +
", upData=" + upData +
", downData=" + downData +
'}';
}
// 序列化与反序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(this.upPackage);
dataOutput.writeLong(this.downPackage);
dataOutput.writeLong(this.upData);
dataOutput.writeLong(this.downData);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.upPackage = dataInput.readLong();
this.downPackage = dataInput.readLong();
this.upData = dataInput.readLong();
this.downData = dataInput.readLong();
}
// 构造方法
public LlPojo() {}
public LlPojo(Long upPackage, Long downPackage, Long upData, Long downData) {
this.upPackage = upPackage;
this.downPackage = downPackage;
this.upData = upData;
this.downData = downData;
}
}
6 , map :
package day02.ll;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
// k1,v1,k2,v2
// 行偏移量,一行文本,一个单词,数字 1
public class LlMap extends Mapper<LongWritable,Text,Text,LlPojo> {
private Text k2 = new Text();
private LlPojo v2 = new LlPojo();
// 1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 游戏娱乐 24 27 2481 24681 200
// 我要下标 6,7,8,9 的数据
@Override
protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
// 每一行的数据都切开,得到字符串数组
String[] arr = value.toString().split("\t");
k2.set(arr[1]);
for (String s : arr) {
v2.setUpPackage(Long.parseLong(arr[6]));
v2.setDownPackage(Long.parseLong(arr[7]));
v2.setUpData(Long.parseLong(arr[8]));
v2.setDownData(Long.parseLong(arr[9]));
}
context.write(k2,v2);
}
}
7 , reduce :
package day02.ll;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
// k2,v2,k3,v3
public class LlReduce extends Reducer<Text,LlPojo,Text,LlPojo> {
private LlPojo pj = new LlPojo();
@Override // 做加法运算
protected void reduce(Text key, Iterable<LlPojo> values, Context context) throws IOException, InterruptedException {
long upPackage = 0;
long downPackage = 0;
long upData = 0;
long downData = 0;
for (LlPojo v : values) {
upPackage+=v.getUpPackage();
downPackage+=v.getDownPackage();
upData+=v.getUpData();
downData+=v.getDownData();
}
pj.setUpPackage(upPackage);
pj.setDownPackage(downPackage);
pj.setUpData(upData);
pj.setDownData(downData);
context.write(key,pj);
}
}
8 ,job :
package day02.ll;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class LlJob extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
// 指定主类
Job job = Job.getInstance(super.getConf(), "ll");
job.setJarByClass(this.getClass());
// 1 ,输入
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("C:\\Users\\86182\\Desktop\\hadoop\\day03 -- mr 高级\\03 -- 流量统计\\data_flow.dat"));
// 2 ,map
job.setMapperClass(LlMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LlPojo.class);
// 3 ,分区
// 4 ,排序
// 5 ,规约
// 6 ,分组
// 7 ,reduce
job.setReducerClass(LlReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LlPojo.class);
// 8 ,输出 ( 必须写一个不存在的路径 )
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("C:\\Users\\86182\\Desktop\\hadoop\\day03 -- mr 高级\\03 -- 流量统计\\out"));
// 执行
boolean b = job.waitForCompletion(true);
// true-0;false-1
return b?0:1;
}
public static void main(String[] args) throws Exception {
int i = ToolRunner.run(new Configuration(), new LlJob(), args);
System.exit(i);
}
}
9 ,执行查看结构 :
本地执行即可
10 ,如果想要排序 : 按照流量排序
思路 : 先按照第一列排序,如果数值一样,就按照第二列,在一样,就第三列,第四列…
11 ,代码 : 只需要修改 pojo ,然后,用 pojo 来做 k2
package day02.ll;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class LlPojo implements WritableComparable<LlPojo> {
// 4 个属性
private Long upPackage;
private Long downPackage;
private Long upData;
private Long downData;
// get,set,toString
public Long getUpPackage() {
return upPackage;
}
public void setUpPackage(Long upPackage) {
this.upPackage = upPackage;
}
public Long getDownPackage() {
return downPackage;
}
public void setDownPackage(Long downPackage) {
this.downPackage = downPackage;
}
public Long getUpData() {
return upData;
}
public void setUpData(Long upData) {
this.upData = upData;
}
public Long getDownData() {
return downData;
}
public void setDownData(Long downData) {
this.downData = downData;
}
@Override
public String toString() {
return "LlPojo{" +
"upPackage=" + upPackage +
", downPackage=" + downPackage +
", upData=" + upData +
", downData=" + downData +
'}';
}
// 序列化与反序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(this.upPackage);
dataOutput.writeLong(this.downPackage);
dataOutput.writeLong(this.upData);
dataOutput.writeLong(this.downData);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.upPackage = dataInput.readLong();
this.downPackage = dataInput.readLong();
this.upData = dataInput.readLong();
this.downData = dataInput.readLong();
}
// 构造方法
public LlPojo() {}
public LlPojo(Long upPackage, Long downPackage, Long upData, Long downData) {
this.upPackage = upPackage;
this.downPackage = downPackage;
this.upData = upData;
this.downData = downData;
}
// 比较大小方法
@Override
public int compareTo(LlPojo o) {
int i1 = this.getUpPackage().compareTo(o.getUpPackage());
int i2 = this.getDownPackage().compareTo(o.getDownPackage());
int i3 = this.getUpData().compareTo(o.getUpData());
int i4 = this.getDownData().compareTo(o.getDownData());
if(i1!=0){
return i1;
}else if(i2!=0){
return i2;
}else if(i3!=0){
return i3;
}else {
return i4;
}
}
}