测试文件:
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200
1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 200
1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 200
1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 200
1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 200
1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.360.cn 信息安全 20 20 3156 2936 200
1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 200
————————————————————————————————————————————————————
对象类:
package com.run.mr.flow;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
public class FlowBean implements WritableComparable<FlowBean> {
private long upFlow;
private long dFlow;
private long sumFlow;
// 反序列化时要调用空参构造函数,所以要显示定义一个。
public FlowBean() {
}
public void set(long upFlow, long dFlow) {
this.upFlow = upFlow;
this.dFlow = dFlow;
this.sumFlow = upFlow + dFlow;
}
public FlowBean(long upFlow, long dFlow) {
this.upFlow = upFlow;
this.dFlow = dFlow;
this.sumFlow = upFlow + dFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getdFlow() {
return dFlow;
}
public void setdFlow(long dFlow) {
this.dFlow = dFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
/**
* 反序列化方法,反序列化的方法和序列化的方法一致
* 每次反序列化都会产生不同的对象
*/
@Override
public void readFields(DataInput in) throws IOException {
upFlow = in.readLong();
dFlow = in.readLong();
sumFlow = in.readLong();
}
/**
* 序列化方法
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(dFlow);
out.writeLong(sumFlow);
}
@Override
public String toString() {
return upFlow + "\t" + dFlow + "\t" + sumFlow;
}
@Override
public int compareTo(FlowBean o) {
return sumFlow > o.getSumFlow() ? 1 : -1;
}
}
——————————————————————————————————————————————————————
package com.run.mr.flow;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FlowCount {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 指定本程序的jar包所在的路径
job.setJarByClass(FlowCount.class);
// 指定要使用的具体的业务类
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReduce.class);
// 指定mapper输出类型的key value leix
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
// 指定最终输出的key valueleix
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
// 指定job输入的原始路径位置
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 指定job输出结果所在的目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 将job中配置的相关参数,以及job所用的java类所在的jar包,提交给yarn去运行
/* job.submit(); */
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
static class FlowMapper extends Mapper<LongWritable, Text, FlowBean, Text> {
FlowBean fb = new FlowBean();//保证是同一个对象,减少开销。
Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, FlowBean, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\t");
String phone = fields[1];
long upflow = Long.parseLong(fields[fields.length - 3]);
long dflow = Long.parseLong(fields[fields.length - 2]);
fb.set(upflow, dflow);
text.set(phone);
context.write(fb, text);
}
}
static class FlowReduce extends Reducer<FlowBean, Text, Text, FlowBean> {
//Text text = new Text();
@Override
protected void reduce(FlowBean key, Iterable<Text> value,
Reducer<FlowBean, Text, Text, FlowBean>.Context context) throws IOException, InterruptedException {
Text phone = value.iterator().next();
//text.set(phone);
context.write(phone, key);
}
}
}