- hadoop2.7.2
- tomcat8.5.31
- jdk8u171
- 源数据结构
电话号码 上行流量 下行流量
- 输出文件数据结构
电话号码 上行流量 下行流量 总流量
Flow(实体类)
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
// 实现序列化接口
// 数据写入需要调用Writable接口
public class Flow implements Writable {
// 如果出现其他构造方法,无参构造方法必须显示声明
public Flow() {
}
public Flow(long upFlow, long downFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
}
private long upFlow;
private long downFlow;
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
// 输出格式改成最后需要显示的效果(可以减少后面的代码量)
@Override
public String toString() {
// 在生成最后的结果文件时,可以使用该方法定义数据输出格式
return upFlow + "\t" + downFlow + "\t" + (upFlow + downFlow);
}
// 这个传参顺序需要与write方法中的顺序对应
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow = in.readLong();
this.downFlow = in.readLong();
}
// 将参数存入实例中(需注意参数类型)
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
}
}
map
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import com.sand.mr.bean.Flow;
// LongWritable:偏移量
public class FlowCountMapper extends Mapper<LongWritable, Text, Text, Flow> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Flow>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
//根据数据格式进行拆分
String phoneNumber = line.split(",")[0];
String upFlow = line.split(",")[1];
String downFlow = line.split(",")[2];
//通过实例化flow对象存数据
Flow flow = new Flow(Long.parseLong(upFlow), Long.parseLong(downFlow));
// 数据写入
context.write(new Text(phoneNumber), flow);
}
}
reduce
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import com.sand.mr.bean.Flow;
public class FlowCountReducer extends Reducer<Text, Flow, Text, Flow> {
@Override
protected void reduce(Text key, Iterable<Flow> values, Reducer<Text, Flow, Text, Flow>.Context context)
throws IOException, InterruptedException {
long upFlow = 0;
long downFlow = 0;
//通过加强for循环统计每个key值的上/下行流量以及总和(总和的计算方法已写入在Flow类中的tostring方法里)
for (Flow flow : values) {
upFlow += flow.getUpFlow();
downFlow += flow.getDownFlow();
}
Flow flow = new Flow(upFlow, downFlow);
context.write(key, flow);
}
}
master
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.sand.mr.bean.Flow;
import com.sand.mr.mapper.FlowCountMapper;
import com.sand.mr.reducer.FlowCountReducer;
public class FlowCountMaster {
public static void main(String[] args) throws Exception {
// 接收参数
String inputPath = args[0];
String outputPut = args[1];
// 初始化配置
Configuration conf = new Configuration();
// 设置fs.defaultFS参数
conf.set("fs.defaultFS", "hdfs://HADOOP01:8020/");
// 初始化job参数,指定job名称
Job job = Job.getInstance(conf, "flowCount");
// 设置运行job的类
job.setJarByClass(FlowCountMaster.class);
// 设置Mapper类
job.setMapperClass(FlowCountMapper.class);
// 设置Reducer类
job.setReducerClass(FlowCountReducer.class);
// 设置Map的输出数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Flow.class);
// 设置Reducer的输出数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Flow.class);
// 设置输入的路径 -> 可以指定为某一路径或具体的文件
FileInputFormat.setInputPaths(job, new Path(inputPath));
// 设置输出的路径 -> 最后一级路径自动生成,不会自动覆盖,需要手动修改
FileOutputFormat.setOutputPath(job, new Path(outputPut));
// 提交job
boolean result = job.waitForCompletion(true);
// 执行成功后进行后续操作
if (result) {
System.out.println("Congratulations!");
}
}
}