原始数据
随便生成的模拟数据flowbean.txt
手机号 IP地址 上行流量 下行流量 网络代码
13345734563 192.168.1.1 1116 3452 200
13545734556 192.168.1.1 1126 252 200
13545734345 192.168.1.1 34516 3362 200
13575734574 192.168.1.1 1886 3642 200
18945734596 192.168.1.1 3446 2342 200
13995734523 192.168.1.1 6756 8672 200
13785734584 192.168.1.1 3456 3352 200
13135734563 192.168.1.1 8766 2352 200
13265734544 192.168.1.1 1516 6552 200
13905738453 192.168.1.1 186 759 200
13665735845 192.168.1.1 5616 3332 200
13725733545 192.168.1.1 23416 422 200
13505737456 192.168.1.1 11426 3252 200
13845734543 192.168.1.1 1746 6252 200
13345734563 192.168.1.1 2336 892 200
13345734563 192.168.1.1 5216 675 200
13465734563 192.168.1.1 1777 455 200
15545734563 192.168.1.1 1099 8992 200
自定义序列化类FlowBean
(1)必须实现Writable接口
(2)反序列化时,需要反射调用空参构造函数,所以必须有空参构造
(3)要想把结果显示在文件中,需要重写toString(),可用”\t”分开,方便后续用。
(4)如果需要将自定义的bean放在key中传输,则还需要实现Comparable接口,因为MapReduce框中的Shuffle过程要求对key必须能排序。
package MapReduceFlow;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlowBean implements Writable, Comparable{
private long upFlow;
private long downFlow;
private long sumFlow;
public FlowBean() {
}
public FlowBean(long upFlow, long downFlow){
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = this.upFlow + this.downFlow;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(this.upFlow);
dataOutput.writeLong(this.downFlow);
dataOutput.writeLong(this.sumFlow);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.upFlow = dataInput.readLong();
this.downFlow = dataInput.readLong();
this.sumFlow = dataInput.readLong();
}
@Override
public int compareTo(Object o) {
FlowBean fb = (FlowBean)o;
if(this.sumFlow < fb.sumFlow){
return -1;
}
else if(this.sumFlow == fb.sumFlow){
return 0;
}
else{
return 1;
}
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
this.sumFlow = this.upFlow + this.downFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
this.sumFlow = this.upFlow + this.downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
}
自定义Mapper类
package MapReduceFlow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class MyMapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, FlowBean> {
protected Text t = new Text();
FlowBean fb = new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException,NumberFormatException {
String str = value.toString();
String[] words = str.split("\t");
String telNum = words[0];
fb.setUpFlow(Long.parseLong(words[2]));
fb.setDownFlow(Long.parseLong(words[3]));
t.set(telNum);
context.write(t,fb);
}
}
自定义Reducer类
package MapReduceFlow;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyReducer extends Reducer<Text, FlowBean,Text, FlowBean> {
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
long upSum = 0;
long downSum = 0;
for(FlowBean fb : values){
upSum += fb.getUpFlow();
downSum += fb.getDownFlow();
}
FlowBean fb = new FlowBean(upSum,downSum);
context.write(key,fb);
}
}
自定义Driver类
package MapReduceFlow;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//FileInputFormat.setInputPaths(job,new Path(args[1]));
//FileOutputFormat.setOutputPath(job,new Path(args[2]));
FileInputFormat.setInputPaths(job,new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\flowbean.txt"));
FileOutputFormat.setOutputPath(job,new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\output"));
boolean ret = job.waitForCompletion(true);
System.exit(ret ? 0 : 1);
}
}