Mapreduce实现手机流量计数

Mapreduce实现手机流量计数

原始数据:
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 ty 12 27 2481 24681 200
1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 hb 5 0 264 0 200
1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 bj 2 4 132 1512 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 hb 5 0 240 0 200
1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 bj 15 2 1527 2106 200
1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 tj 20 16 4116 1432 200
1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 hn 18 15 1116 54 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 ah 20 20 3156 2936 200
1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 cq 4 0 240 0 200
1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 bj 24 9 6960 690 200
1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 ah 28 27 3659 3538 200
1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 hb 3 3 1938 180 200
1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 xa 15 9 918 4938 200
1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 hn 3 3 180 180 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 bj 15 12 1938 2910 200
1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 tj 26 12 3008 3720 200
1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 cq 57 102 7335 110349 200
1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 bj 21 18 9531 2412 200
1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 ah 69 63 11058 48243 200
1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 ty 2 2 120 120 200
1363157985066 13726238888 00-FD-07-A4-72-B8:CMCC 120.196.100.82 tj 15 27 2481 24681 200
1363157993055 13560436666 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 bj 18 15 1116 954 200

要截取这个文本文件的第1字段,第7字段 ,第8字段。然后上行流量和下行流量相加。
我们先 定义一个自定义值
实现writable接口 重写tostring,writable和readFields方法。
源代码:

package com.Flow;



import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Interite implements Writable{
    private int up;
    private int down;
    private int sum;

    public Interite(){

    }
    public Interite(int up,int down){
        this.up = up;
        this.down = down;
        this.sum = this.up+this.down;
    }

    public void setUp(int up) {
        this.up = up;
    }

    public void setDown(int down) {
        this.down = down;
    }

    public void setSum(int sum) {
        this.sum = up+down;
    }

    public int getUp() {
        return up;
    }

    public int getDown() {
        return down;
    }

    public int getSum() {
        return up+down;
    }
    public String toString(){
        return up+"\t"+down+"\t"+sum;

    }
    public void write(DataOutput dataOutput)throws IOException{
        dataOutput.writeInt(this.up);
        dataOutput.writeInt(this.down);
        dataOutput.writeInt(this.sum);
    }
    public void readFields(DataInput dataInput)throws IOException{
        this.up=dataInput.readInt();
        this.down = dataInput.readInt();
        this.sum = dataInput.readInt();
    }
}

然后的话,需要要求将135,136,137,138,139和 其余字段 分别放入不同的文件中。 所以我们需要继承Partitioner类 然后重写类 里面的getPartition方法。

**附:PartitionerPartitioner组件的功能是让Map对key进行分区,从而将不同key分发到不同的Redicer中以进行处理。分区阶段发生在Map阶段之后。Reduce阶段之间,分区的数量等于Reducer的个数。Reducer的个数可以在驱动类里面通过job.setNumReduce Tasks. 在使用多个Reducer的情况下,需要一些方法来确保 Mapper输出的键值对发送到 正确的Reducer中.自定义Partitioner需要继承Partitioner<K2,V2>,并且重写getPartition方法。如果最终结果要输出 到 多个文件里,那么只需要让getPartition方法按照一定的规则返回0,1,2,3等即可。使用Partitioner还需要在驱动类里面设置Partitioner类以及Reducer个数

然后代码块:

package com.Flow;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.io.Text;

import java.lang.reflect.Parameter;
import java.util.HashMap;

public class MyHashPartitioner extends Partitioner<Text,Interite> {
    private static HashMap<String,Integer>areamap = new HashMap<String, Integer>();
    static {
        areamap.put("135",0);
        areamap.put("136",1);
        areamap.put("137",2);
        areamap.put("138",3);
        areamap.put("139",4);
    }
    public int getPartition(Text text,Interite value,int i){
        Integer areCoder = areamap.get(text.toString().substring(0,3));
        if (areCoder==null){
            areCoder=5;
        }
        return areCoder;
    }
}

最后就是我们的map和reduce以及驱动类 我写成了一个类

package com.Flow;
import java.io.*;
import java.util.*;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.IFile;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.v2.app.webapp.App;
import org.apache.hadoop.util.GenericOptionsParser;
public class Flow {
    public static class map1 extends Mapper<Object, Text, Text, Interite> {
        public void map(Object key, Text value, Context context) throws IOException,InterruptedException{
            String line = value.toString();
            String[] list = line.split(" ");
            context.write( new Text(list[1]),new Interite(Integer.parseInt(list[7]),Integer.parseInt(list[8])));
        }
    }
    public static class reduce1 extends Reducer<Text,Interite,Text,Interite>{
        public void reduce(Text key,Iterable<Interite>values,Context context)throws IOException,InterruptedException{
            List<Interite>list =new ArrayList<>();
            int up  = 0;
            int down = 0;
            for (Interite value:values){
                up  +=value.getUp();
                down+=value.getDown();
            }
            context.write(new Text(key),new Interite(up,down));
        }
    }
      public static void main(String[] args) throws IOException,  ClassNotFoundException, InterruptedException {
        //得到hadoop的一个配置参数
        Configuration conf = new Configuration();
        //获取一个job实例
        Job job = Job.getInstance(conf);
        //加载job的运行类
        job.setJarByClass(Flow.class);
        job.setMapperClass(map1.class);
        job.setReducerClass(reduce1.class);
        //设置mapper类的输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Interite.class);
        // 定义分组逻辑类
          job.setPartitionerClass(MyHashPartitioner.class);
        // 设定reducer的任务并发数,应该跟分组的数量保持一致
          job.setNumReduceTasks(6);

        //设置文件输入的路径
        FileInputFormat.setInputPaths(job, new Path("E:\\学习\\exercise\\mobile.txt"));
        //设置文件的输出路径
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path("E:\\学习\\exercise\\out1");
        if (fs.isDirectory(path)) {
            fs.delete(path, true);
        }
        FileOutputFormat.setOutputPath(job, new Path("E:\\学习\\exercise\\out1"));
        boolean res = job.waitForCompletion(true);
          System.out.println(res);

    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值