Mapreduce实现手机流量计数
原始数据:
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 ty 12 27 2481 24681 200
1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 hb 5 0 264 0 200
1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 bj 2 4 132 1512 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 hb 5 0 240 0 200
1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 bj 15 2 1527 2106 200
1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 tj 20 16 4116 1432 200
1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 hn 18 15 1116 54 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 ah 20 20 3156 2936 200
1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 cq 4 0 240 0 200
1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 bj 24 9 6960 690 200
1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 ah 28 27 3659 3538 200
1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 hb 3 3 1938 180 200
1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 xa 15 9 918 4938 200
1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 hn 3 3 180 180 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 bj 15 12 1938 2910 200
1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 tj 26 12 3008 3720 200
1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 cq 57 102 7335 110349 200
1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 bj 21 18 9531 2412 200
1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 ah 69 63 11058 48243 200
1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 ty 2 2 120 120 200
1363157985066 13726238888 00-FD-07-A4-72-B8:CMCC 120.196.100.82 tj 15 27 2481 24681 200
1363157993055 13560436666 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 bj 18 15 1116 954 200
要截取这个文本文件的第1字段,第7字段 ,第8字段。然后上行流量和下行流量相加。
我们先 定义一个自定义值
实现writable接口 重写tostring,writable和readFields方法。
源代码:
package com.Flow;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Interite implements Writable{
private int up;
private int down;
private int sum;
public Interite(){
}
public Interite(int up,int down){
this.up = up;
this.down = down;
this.sum = this.up+this.down;
}
public void setUp(int up) {
this.up = up;
}
public void setDown(int down) {
this.down = down;
}
public void setSum(int sum) {
this.sum = up+down;
}
public int getUp() {
return up;
}
public int getDown() {
return down;
}
public int getSum() {
return up+down;
}
public String toString(){
return up+"\t"+down+"\t"+sum;
}
public void write(DataOutput dataOutput)throws IOException{
dataOutput.writeInt(this.up);
dataOutput.writeInt(this.down);
dataOutput.writeInt(this.sum);
}
public void readFields(DataInput dataInput)throws IOException{
this.up=dataInput.readInt();
this.down = dataInput.readInt();
this.sum = dataInput.readInt();
}
}
然后的话,需要要求将135,136,137,138,139和 其余字段 分别放入不同的文件中。 所以我们需要继承Partitioner类 然后重写类 里面的getPartition方法。
**附:Partitioner
然后代码块:
package com.Flow;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.io.Text;
import java.lang.reflect.Parameter;
import java.util.HashMap;
public class MyHashPartitioner extends Partitioner<Text,Interite> {
private static HashMap<String,Integer>areamap = new HashMap<String, Integer>();
static {
areamap.put("135",0);
areamap.put("136",1);
areamap.put("137",2);
areamap.put("138",3);
areamap.put("139",4);
}
public int getPartition(Text text,Interite value,int i){
Integer areCoder = areamap.get(text.toString().substring(0,3));
if (areCoder==null){
areCoder=5;
}
return areCoder;
}
}
最后就是我们的map和reduce以及驱动类 我写成了一个类
package com.Flow;
import java.io.*;
import java.util.*;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.IFile;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.v2.app.webapp.App;
import org.apache.hadoop.util.GenericOptionsParser;
public class Flow {
public static class map1 extends Mapper<Object, Text, Text, Interite> {
public void map(Object key, Text value, Context context) throws IOException,InterruptedException{
String line = value.toString();
String[] list = line.split(" ");
context.write( new Text(list[1]),new Interite(Integer.parseInt(list[7]),Integer.parseInt(list[8])));
}
}
public static class reduce1 extends Reducer<Text,Interite,Text,Interite>{
public void reduce(Text key,Iterable<Interite>values,Context context)throws IOException,InterruptedException{
List<Interite>list =new ArrayList<>();
int up = 0;
int down = 0;
for (Interite value:values){
up +=value.getUp();
down+=value.getDown();
}
context.write(new Text(key),new Interite(up,down));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//得到hadoop的一个配置参数
Configuration conf = new Configuration();
//获取一个job实例
Job job = Job.getInstance(conf);
//加载job的运行类
job.setJarByClass(Flow.class);
job.setMapperClass(map1.class);
job.setReducerClass(reduce1.class);
//设置mapper类的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Interite.class);
// 定义分组逻辑类
job.setPartitionerClass(MyHashPartitioner.class);
// 设定reducer的任务并发数,应该跟分组的数量保持一致
job.setNumReduceTasks(6);
//设置文件输入的路径
FileInputFormat.setInputPaths(job, new Path("E:\\学习\\exercise\\mobile.txt"));
//设置文件的输出路径
FileSystem fs = FileSystem.get(conf);
Path path = new Path("E:\\学习\\exercise\\out1");
if (fs.isDirectory(path)) {
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, new Path("E:\\学习\\exercise\\out1"));
boolean res = job.waitForCompletion(true);
System.out.println(res);
}
}