FlowCount.java
package cn.itheima.bigdata.hadoop.mr.flowcount
import java.io.IOException
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.InputFormat
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.Mapper
import org.apache.hadoop.mapreduce.Reducer
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
//hadoop自己实现的序列化机制跟jdk有区别: 比jdk更精简
public class FlowCount {
public static class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
private FlowBean flowBean = new FlowBean()
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
try {
// 拿到一行数据
String line = value.toString()
// 切分字段
String[] fields = StringUtils.split(line, "\t")
// 拿到我们需要的若干个字段
String phoneNbr = fields[1]
long up_flow = Long.parseLong(fields[fields.length - 3])
long d_flow = Long.parseLong(fields[fields.length - 2])
// 将数据封装到一个flowbean中
flowBean.set(phoneNbr, up_flow, d_flow)
// 以手机号为key,将流量数据输出去
context.write(new Text(phoneNbr), flowBean)
}catch(Exception e){
System.out.println("exception occured in mapper" )
}
}
}
public static class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
private FlowBean flowBean = new FlowBean()
@Override
protected void reduce(Text key, Iterable<FlowBean> values,Context context)
throws IOException, InterruptedException {
long up_flow_sum = 0
long d_flow_sum = 0
for(FlowBean bean:values){
up_flow_sum += bean.getUp_flow()
d_flow_sum += bean.getD_flow()
}
flowBean.set(key.toString(), up_flow_sum, d_flow_sum)
context.write(key, flowBean)
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration()
Job job = Job.getInstance(conf,"flowjob")
job.setJarByClass(FlowCount.class)
job.setMapperClass(FlowCountMapper.class)
job.setReducerClass(FlowCountReducer.class)
job.setMapOutputKeyClass(Text.class)
job.setMapOutputValueClass(FlowBean.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(FlowBean.class)
job.setInputFormatClass(TextInputFormat.class)
job.setOutputFormatClass(TextOutputFormat.class)
FileInputFormat.setInputPaths(job, new Path(args[0]))
FileOutputFormat.setOutputPath(job, new Path(args[1]))
job.waitForCompletion(true)
}
}
FlowCountSort.java
package cn.itheima.bigdata.hadoop.mr.flowcount
import java.io.IOException
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.Mapper
import org.apache.hadoop.mapreduce.Reducer
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
public class FlowCountSort {
public static class FlowCountSortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable>{
FlowBean bean = new FlowBean()
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString()
String[] fields = StringUtils.split(line, "\t")
String phoneNbr = fields[0]
long up_flow = Long.parseLong(fields[1])
long d_flow = Long.parseLong(fields[2])
bean.set(phoneNbr, up_flow, d_flow)
context.write(bean, NullWritable.get())
}
}
public static class FlowCountSortReducer extends Reducer<FlowBean, NullWritable, Text, FlowBean>{
@Override
protected void reduce(FlowBean bean, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
context.write(new Text(bean.getPhoneNbr()), bean)
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration()
Job job = Job.getInstance(conf,"sortjob")
job.setJarByClass(FlowCountSort.class)
job.setMapperClass(FlowCountSortMapper.class)
job.setReducerClass(FlowCountSortReducer.class)
job.setMapOutputKeyClass(FlowBean.class)
job.setMapOutputValueClass(NullWritable.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(FlowBean.class)
job.setInputFormatClass(TextInputFormat.class)
job.setOutputFormatClass(TextOutputFormat.class)
FileInputFormat.setInputPaths(job, new Path(args[0]))
FileOutputFormat.setOutputPath(job, new Path(args[1]))
job.waitForCompletion(true)
}
}
FlowBean.java
package cn.itheima.bigdata.hadoop.mr.flowcount;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
public class FlowBean implements WritableComparable<FlowBean> {
private String phoneNbr;
private long up_flow;
private long d_flow;
private long sum_flow;
public void set(String phoneNbr, long up_flow, long d_flow) {
this.phoneNbr = phoneNbr;
this.up_flow = up_flow;
this.d_flow = d_flow;
this.sum_flow = up_flow + d_flow;
}
public String getPhoneNbr() {
return phoneNbr;
}
public void setPhoneNbr(String phoneNbr) {
this.phoneNbr = phoneNbr;
}
public long getUp_flow() {
return up_flow;
}
public void setUp_flow(long up_flow) {
this.up_flow = up_flow;
}
public long getD_flow() {
return d_flow;
}
public void setD_flow(long d_flow) {
this.d_flow = d_flow;
}
public long getSum_flow() {
return sum_flow;
}
public void setSum_flow(long sum_flow) {
this.sum_flow = sum_flow;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(phoneNbr);
out.writeLong(up_flow);
out.writeLong(d_flow);
out.writeLong(sum_flow);
}
@Override
public void readFields(DataInput in) throws IOException {
phoneNbr = in.readUTF();
up_flow = in.readLong();
d_flow = in.readLong();
sum_flow = in.readLong();
}
@Override
public String toString() {
return up_flow + "\t" + d_flow + "\t" + sum_flow;
}
@Override
public int compareTo(FlowBean o) {
return sum_flow > o.getSum_flow()?-1:1;
}
}