MapReduce自定义类型

在 Hadoop 中操作的所有数据类型都要实现一个Writable 的接口,实现该接口才能进行序列化,才能读取和写入。

测试数据

1363157985066 	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com		24	27	2481	24681	200
1363157995052 	13826544101	5C-0E-8B-C7-F1-E0:CMCC	120.197.40.4			4	0	264	0	200
1363157991076 	13926435656	20-10-7A-28-CC-0A:CMCC	120.196.100.99			2	4	132	1512	200
1363154400022 	13926251106	5C-0E-8B-8B-B1-50:CMCC	120.197.40.4			4	0	240	0	200
1363157993044 	18211575961	94-71-AC-CD-E6-18:CMCC-EASY	120.196.100.99	iface.qiyi.com	视频网站	15	12	1527	2106	200
1363157995074 	84138413	5C-0E-8B-8C-E8-20:7DaysInn	120.197.40.4	122.72.52.12		20	16	4116	1432	200
1363157993055 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			18	15	1116	954	200
1363157995033 	15920133257	5C-0E-8B-C7-BA-20:CMCC	120.197.40.4	sug.so.360.cn	信息安全	20	20	3156	2936	200
1363157983019 	13719199419	68-A1-B7-03-07-B1:CMCC-EASY	120.196.100.82			4	0	240	0	200
1363157984041 	13660577991	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	24	9	6960	690	200
1363157973098 	15013685858	5C-0E-8B-C7-F7-90:CMCC	120.197.40.4	rank.ie.sogou.com	搜索引擎	28	27	3659	3538	200
1363157986029 	15989002119	E8-99-C4-4E-93-E0:CMCC-EASY	120.196.100.99	www.umeng.com	站点统计	3	3	1938	180	200
1363157992093 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			15	9	918	4938	200
1363157986041 	13480253104	5C-0E-8B-C7-FC-80:CMCC-EASY	120.197.40.4			3	3	180	180	200
1363157984040 	13602846565	5C-0E-8B-8B-B6-00:CMCC	120.197.40.4	2052.flash2-http.qq.com	综合门户	15	12	1938	2910	200
1363157995093 	13922314466	00-FD-07-A2-EC-BA:CMCC	120.196.100.82	img.qfc.cn		12	12	3008	3720	200
1363157982040 	13502468823	5C-0A-5B-6A-0B-D4:CMCC-EASY	120.196.100.99	y0.ifengimg.com	综合门户	57	102	7335	110349	200
1363157986072 	18320173382	84-25-DB-4F-10-1A:CMCC-EASY	120.196.100.99	input.shouji.sogou.com	搜索引擎	21	18	9531	2412	200
1363157990043 	13925057413	00-1F-64-E1-E6-9A:CMCC	120.196.100.55	t3.baidu.com	搜索引擎	69	63	11058	48243	200
1363157988072 	13760778710	00-FD-07-A4-7B-08:CMCC	120.196.100.82			2	2	120	120	200
1363157985079 	13823070001	20-7C-8F-70-68-1F:CMCC	120.196.100.99			6	3	360	180	200
1363157985069 	13600217502	00-1F-64-E2-E8-B1:CMCC	120.196.100.55			18	138	1080	186852	200
实现上面手机上网日志,第6~9字段是流量的信息,统计每个用户的 upPackNum、downPackNum、upPayLoad以及downPayLoad字段的和

Writable 接口

Writable 接口实现的主要是 write 和 readFields 方法
public interface Writable {
  /** 
   * Serialize the fields of this object to <code>out</code>.
   */
  void write(DataOutput out) throws IOException;

  /** 
   * Deserialize the fields of this object from <code>in</code>.  
   */
  void readFields(DataInput in) throws IOException;
}
Write :把对象的属性序列化到 DataOutput 中
readFields:数据反序列化到对象的属性

封装 KpiWritable

封装用户的四个属性字段
package WriTable;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * Created by hubo on 2017/12/30
 */
public class KpiWritable implements Writable{
    long upPackNum;
    long downPackNum;
    long upPayLoad;
    long downpayLoad;

    public KpiWritable() {

    }

    public KpiWritable(String upPackNum, String downPackNum, String upPayLoad, String downpayLoad) {
        this.upPackNum = Long.parseLong(upPackNum);
        this.downPackNum = Long.parseLong(downPackNum);
        this.upPayLoad = Long.parseLong(upPayLoad);
        this.downpayLoad = Long.parseLong(downpayLoad);
    }

    @Override
    public String toString() {
        return "KpiWritable{" +
                "upPackNum=" + upPackNum +
                ", downPackNum=" + downPackNum +
                ", upPayLoad=" + upPayLoad +
                ", downpayLoad=" + downpayLoad +
                '}';
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        upPayLoad = dataInput.readLong();
        upPackNum = dataInput.readLong();
        downPackNum = dataInput.readLong();
        downpayLoad = dataInput.readLong();
    }
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(upPackNum);
        dataOutput.writeLong(upPayLoad);
        dataOutput.writeLong(downPackNum);
        dataOutput.writeLong(downpayLoad);
    }
}

实现 Mapper 类

package WriTable;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Created by hubo on 2017/12/30
 */
public class MyMapper extends Mapper<LongWritable,Text,Text,KpiWritable>{
    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {

            Counter sensitiveCounter = context.getCounter("Sensitive Words:","Phone1");
            Counter sensitiveCounter1 = context.getCounter("Sensitive Words:","Phone1");

            String line = value.toString();
            if(line.contains("1363154400022")){
                sensitiveCounter.increment(1L);
            }else if(line.contains("1363157993044")){
                sensitiveCounter1.increment(1L);
            }
            String[] splits = value.toString().split("\t");
            String phone = splits[1];
            Text k = new Text(phone);
            KpiWritable v = new KpiWritable(splits[6],splits[7],splits[8],splits[9]);
            context.write(k,v);
    }
}

实现 Reducer 类

package WriTable;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


/**
 * Created by hubo on 2017/12/30
 */
public class MyReducer extends Reducer<Text,KpiWritable,Text,KpiWritable> {
    @Override
    protected void reduce(Text key, Iterable<KpiWritable> values, Context context) throws IOException, InterruptedException {
        long upPackNum = 0L;
        long downPackNum = 0L;
        long upPayLoad = 0L;
        long downpayLoad = 0L;
        for(KpiWritable kpiWritable : values){
            upPackNum += kpiWritable.upPackNum;
            upPayLoad += kpiWritable.upPayLoad;
            downPackNum += kpiWritable.downPackNum;
            downpayLoad += kpiWritable.downpayLoad;
        }
        KpiWritable v = new KpiWritable(upPackNum+"",downPackNum+"",upPayLoad+"",+downpayLoad+"");
        context.write(key,v);
    }
}

实现 run

package WriTable;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * Created by hubo on 2017/12/30
 */
public class MyJob extends Configured implements Tool{
    public static final String In_PATH = "/in";
    public static final String Out_PATH = "/out";

    @Override
    public int run(String[] strings) throws Exception {
        FileSystem fs = FileSystem.get(getConf());
        Path outPath = new Path(Out_PATH);
        if(fs.exists(outPath))
            fs.delete(outPath,true);

        Job job = new Job(getConf(),"MyJob");

        FileInputFormat.setInputPaths(job,new Path(In_PATH));

        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(KpiWritable.class);

        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(KpiWritable.class);

        FileOutputFormat.setOutputPath(job,outPath);
        job.waitForCompletion(true);

        return 0;
    }

    public static void main(String[] args) {
        System.setProperty("HADOOP_USER_NAME","root");
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://node:9000");
        try {
            int res = ToolRunner.run(conf,new MyJob(),args);
            System.exit(res);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

调试结果

13480253104	KpiWritable{upPackNum=180, downPackNum=3, upPayLoad=3, downpayLoad=180}
13502468823	KpiWritable{upPackNum=7335, downPackNum=102, upPayLoad=57, downpayLoad=110349}
13560439658	KpiWritable{upPackNum=2034, downPackNum=24, upPayLoad=33, downpayLoad=5892}
13600217502	KpiWritable{upPackNum=1080, downPackNum=138, upPayLoad=18, downpayLoad=186852}
13602846565	KpiWritable{upPackNum=1938, downPackNum=12, upPayLoad=15, downpayLoad=2910}
13660577991	KpiWritable{upPackNum=6960, downPackNum=9, upPayLoad=24, downpayLoad=690}
13719199419	KpiWritable{upPackNum=240, downPackNum=0, upPayLoad=4, downpayLoad=0}
13726230503	KpiWritable{upPackNum=2481, downPackNum=27, upPayLoad=24, downpayLoad=24681}
13760778710	KpiWritable{upPackNum=120, downPackNum=2, upPayLoad=2, downpayLoad=120}
13823070001	KpiWritable{upPackNum=360, downPackNum=3, upPayLoad=6, downpayLoad=180}
13826544101	KpiWritable{upPackNum=264, downPackNum=0, upPayLoad=4, downpayLoad=0}
13922314466	KpiWritable{upPackNum=3008, downPackNum=12, upPayLoad=12, downpayLoad=3720}
13925057413	KpiWritable{upPackNum=11058, downPackNum=63, upPayLoad=69, downpayLoad=48243}
13926251106	KpiWritable{upPackNum=240, downPackNum=0, upPayLoad=4, downpayLoad=0}
13926435656	KpiWritable{upPackNum=132, downPackNum=4, upPayLoad=2, downpayLoad=1512}
15013685858	KpiWritable{upPackNum=3659, downPackNum=27, upPayLoad=28, downpayLoad=3538}
15920133257	KpiWritable{upPackNum=3156, downPackNum=20, upPayLoad=20, downpayLoad=2936}
15989002119	KpiWritable{upPackNum=1938, downPackNum=3, upPayLoad=3, downpayLoad=180}
18211575961	KpiWritable{upPackNum=1527, downPackNum=12, upPayLoad=15, downpayLoad=2106}
18320173382	KpiWritable{upPackNum=9531, downPackNum=18, upPayLoad=21, downpayLoad=2412}
84138413	KpiWritable{upPackNum=4116, downPackNum=16, upPayLoad=20, downpayLoad=1432}
主要学习如何自定义类型(实现 write 和 readfields 和 toString(默认类型输出))
ToolRunner 如何运行(实现 Tool 接口,重写 run 方法,通过 getConf()获取 COnfiguation 对象)
提高工作效率

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值