自定义数据类型

自定义数据类型

hadoop中的数据类型就有

 		FloatWritable
        DoubleWritable
      	 Context
   	 	Text
        IntWritable
        ShortWritable
        LongWritable
        ByteWritable  

在处理一些问题时这些类型很明显是不够的,因此要自定义类型
自定义类型的要求:

  1. 继承并重写序列化和反序列,这是因为在map阶段或者reduce阶段都可能将数据写入磁盘因此必须要重写方法。
  2. 要有一个空的构造函数
  3. 要有一个全参的构造函数
  4. 重写toString函数
  5. 如果有排序的需求还应该继承重写comporeable
  6. get和set方法

数据

13726230501 200 1100
13396230502 300 1200
13396230502 320 500
13597230534 300 1200
13396230502 100 3200
13897230503 400 1300
13897230503 100 300
13597230534 500 1400

结果

13396230502 720,4900,5620
13597230534 800,2600,3400
13726230501 200,1100,1300
13897230503 500,1600,2100

JavaBen

package PhoneTest2;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class JavaBen implements Writable {
    private int upFlow;
    private int downFlow;
    private int totalFlow;

    //构造器

    public JavaBen(int upFlow, int downFlow, int totalFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.totalFlow = totalFlow;
    }
//无参构造器
    public JavaBen() {
    }

//get和set方法

    public int getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(int upFlow) {
        this.upFlow = upFlow;
    }

    public int getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(int downFlow) {
        this.downFlow = downFlow;
    }

    public int getTotalFlow() {
        return totalFlow;
    }

    public void setTotalFlow(int totalFlow) {
        this.totalFlow = totalFlow;
    }


//重写同String
    @Override
    public String toString() {
        return upFlow+"," + downFlow + "," + totalFlow;
    }

//重写Hash函数

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        JavaBen other = (JavaBen) obj;
        if (downFlow != other.downFlow)
            return false;
        if (totalFlow != other.totalFlow)
            return false;
        if (upFlow != other.upFlow)
            return false;
        return true;
    }
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + downFlow;
        result = prime * result + totalFlow;
        result = prime * result + upFlow;
        return result;
    }

    /**
     * 重写序列化:
     * 把相应的属性值按照顺序序列化
     */
    @Override
    public void write(DataOutput out) throws IOException {
        //调用输出流的写出方法,即可序列化
        out.writeInt(upFlow);
        out.writeInt(downFlow);
        out.writeInt(totalFlow);
    }
    /**
     * 重写反序列化: 按照序列化的顺序反序列化读取
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        upFlow = in.readInt();
        downFlow = in.readInt();
        totalFlow = in.readInt();
    }
}

MyDriver

package PhoneTest2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //加载default参数
        Configuration conf = new Configuration();
        //创建作业对象
        Job job = Job.getInstance(conf,"PhoneTest2");
        //设置驱动类
        job.setJarByClass(MyDriver.class);
        //设置map类型
        job.setMapperClass(MyMapper.class);
        //设置reduce类型
        job.setReducerClass(MyReduce.class);
        //设置数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(JavaBen.class);
        //设置要切分的文件路径
        FileInputFormat.setInputPaths(job,new Path("D:\\千峰\\Linux\\1903hadoop\\mr3\\data\\flow"));
        //设置输出路径
        FileOutputFormat.setOutputPath(job, new Path("D:\\test数据\\out0"));
        //提交作业,等待完成
        System.exit(job.waitForCompletion(true)?0:1);

    }
}

MyMapper

package PhoneTest2;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MyMapper extends Mapper<LongWritable, Text,Text,JavaBen> {
    JavaBen javaBen = new JavaBen();
    Text text = new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] str = value.toString().split("\t");
        javaBen.setUpFlow(Integer.parseInt(str[1]));
        javaBen.setDownFlow(Integer.parseInt(str[2]));
        javaBen.setTotalFlow(Integer.parseInt(str[1])+Integer.parseInt(str[2]));
        text.set(str[0]);
        context.write(text,javaBen);

    }
}

MyReduce

package PhoneTest2;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class MyReduce extends Reducer<Text,JavaBen,Text,JavaBen> {
    JavaBen ben ;
    @Override
    protected void reduce(Text key, Iterable<JavaBen> values, Context context) throws IOException, InterruptedException {
        int upsum = 0;
        int downsum = 0;
        int sum =0;
        for (JavaBen javaBen : values){
            upsum += javaBen.getUpFlow();
            downsum += javaBen.getDownFlow();
        }
        sum = upsum + downsum;
        ben = new JavaBen(upsum,downsum,sum);
        context.write(key,ben);
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值