实现手机号上行及下行流量统计
数据
说明
实现
DataInfo
package com.soft863;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
// 1 实现writable接口
public class DataInfo implements Writable {
long upFlow;
long downFlow;
long sumFlow;
//2 反序列化时,需要反射调用空参构造函数,所以必须有
public DataInfo() {
super();
}
public DataInfo(long up, long down) {
upFlow = up;
downFlow = down;
sumFlow = upFlow + downFlow;
}
public void setFlow(long up, long down) {
upFlow = up;
downFlow = down;
sumFlow = upFlow + downFlow;
}
//3 写序列化方法
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
//4 反序列化方法
//5 反序列化方法读顺序必须和写序列化方法的写顺序必须一致
public void readFields(DataInput in) throws IOException {
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
}
// 6 编写toString方法,方便后续打印到文本
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
}
FlowMapper
package com.soft863;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowMapper extends Mapper<LongWritable, Text, Text, DataInfo> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 1 获取一行
String line = value.toString();
// 2 切割字段
String[] words = line.split("\t");
// 3 封装对象
// 取出手机号码
String phone = words[1];
// 取出上行流量和下行流量
long upFlow = Long.parseLong(words[words.length - 3]);
long downFlow = Long.parseLong(words[words.length - 2]);
context.write(new Text(phone), new DataInfo(upFlow, downFlow));
}
}
FlowReducer
package com.soft863;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowReducer extends Reducer<Text, DataInfo, Text, DataInfo> {
@Override
protected void reduce(Text key, Iterable<DataInfo> values, Context context)
throws IOException, InterruptedException {
long sumUpFlow = 0;
long sumDownFlow = 0;
// 1 遍历所用bean,将其中的上行流量,下行流量分别累加
for (DataInfo dataInfo : values) {
sumUpFlow += dataInfo.upFlow;
sumDownFlow += dataInfo.downFlow;
context.write(key, new DataInfo(sumUpFlow, sumDownFlow));
}
}
}
FlowDriver
package com.soft863;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop100:9000");
Job job = Job.getInstance(conf);
job.setJarByClass(FlowDriver.class);
job.setMapperClass(FlowMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DataInfo.class);
job.setReducerClass(FlowReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataInfo.class);
Path sourceFile = new Path(args[0]);
Path targetFile = new Path(args[1]);
FileInputFormat.setInputPaths(job, sourceFile);
FileOutputFormat.setOutputPath(job, targetFile);
FileSystem fs = FileSystem.get(conf);
//输出路径文件夹不能存在,如果存在则发生错误,所以在此先进行删除
if (fs.exists(targetFile)) {
fs.delete(targetFile, true);
}
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
运行参数
/bigdata/phone_info.txt /phone_out
运行结果