Hadoop序列化小例子[统计手机的上行流量,下行流量,总流量]

原始数据

随便生成的模拟数据flowbean.txt

手机号       IP地址           上行流量 	下行流量  	网络代码
13345734563	192.168.1.1		1116		3452		200
13545734556	192.168.1.1		1126		252			200
13545734345	192.168.1.1		34516		3362		200
13575734574	192.168.1.1		1886		3642		200
18945734596	192.168.1.1		3446		2342		200
13995734523	192.168.1.1		6756		8672		200
13785734584	192.168.1.1		3456		3352		200
13135734563	192.168.1.1		8766		2352		200
13265734544	192.168.1.1		1516		6552		200
13905738453	192.168.1.1		186			759			200
13665735845	192.168.1.1		5616		3332		200
13725733545	192.168.1.1		23416		422			200
13505737456	192.168.1.1		11426		3252		200
13845734543	192.168.1.1		1746		6252		200
13345734563	192.168.1.1		2336		892			200
13345734563	192.168.1.1		5216		675			200
13465734563	192.168.1.1		1777		455			200
15545734563	192.168.1.1		1099		8992		200

自定义序列化类FlowBean

(1)必须实现Writable接口
(2)反序列化时,需要反射调用空参构造函数,所以必须有空参构造
(3)要想把结果显示在文件中,需要重写toString(),可用”\t”分开,方便后续用。
(4)如果需要将自定义的bean放在key中传输,则还需要实现Comparable接口,因为MapReduce框中的Shuffle过程要求对key必须能排序。

package MapReduceFlow;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements Writable, Comparable{
    private long upFlow;
    private long downFlow;
    private long sumFlow;

    public FlowBean() {

    }

    public FlowBean(long upFlow, long downFlow){
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = this.upFlow + this.downFlow;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(this.upFlow);
        dataOutput.writeLong(this.downFlow);
        dataOutput.writeLong(this.sumFlow);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.upFlow = dataInput.readLong();
        this.downFlow = dataInput.readLong();
        this.sumFlow = dataInput.readLong();
    }

    @Override
    public int compareTo(Object o) {
        FlowBean fb = (FlowBean)o;

        if(this.sumFlow < fb.sumFlow){
            return -1;
        }
        else if(this.sumFlow == fb.sumFlow){
            return 0;
        }
        else{
            return 1;
        }
    }

    @Override
    public String toString() {
        return upFlow + "\t" + downFlow + "\t" + sumFlow;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
        this.sumFlow = this.upFlow + this.downFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
        this.sumFlow = this.upFlow + this.downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }
}

自定义Mapper类

package MapReduceFlow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;

public class MyMapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, FlowBean> {
    protected Text t = new Text();
    FlowBean fb = new FlowBean();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException,NumberFormatException {
        String str = value.toString();
        String[] words = str.split("\t");

        String telNum = words[0];
        fb.setUpFlow(Long.parseLong(words[2]));
        fb.setDownFlow(Long.parseLong(words[3]));
        t.set(telNum);

        context.write(t,fb);
    }
}

自定义Reducer类

package MapReduceFlow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MyReducer extends Reducer<Text, FlowBean,Text, FlowBean> {
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
       long upSum = 0;
       long downSum = 0;

       for(FlowBean fb : values){
           upSum += fb.getUpFlow();
           downSum += fb.getDownFlow();
       }

       FlowBean fb = new FlowBean(upSum,downSum);
       context.write(key,fb);
    }
}

自定义Driver类

package MapReduceFlow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(MyDriver.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        //FileInputFormat.setInputPaths(job,new Path(args[1]));
        //FileOutputFormat.setOutputPath(job,new Path(args[2]));
        FileInputFormat.setInputPaths(job,new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\flowbean.txt"));
        FileOutputFormat.setOutputPath(job,new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\output"));

        boolean ret = job.waitForCompletion(true);
        System.exit(ret ? 0 : 1);
    }
}

执行结果

在这里插入图片描述

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
首先,需要在Hadoop中准备好phone_data(3)文件,可以使用HDFS命令将文件上传到HDFS中。然后,可以使用Java编写MapReduce程序来统计手机号的上行流量下行流量总流量,并将结果序列化输出。 下面是示例代码: Mapper代码: ``` public class PhoneDataMapper extends Mapper<LongWritable, Text, Text, PhoneDataWritable> { private PhoneDataWritable phoneDataWritable = new PhoneDataWritable(); private Text phoneNum = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); if (fields.length == 7) { phoneNum.set(fields[1]); phoneDataWritable.setUpFlow(Long.parseLong(fields[4])); phoneDataWritable.setDownFlow(Long.parseLong(fields[5])); phoneDataWritable.setTotalFlow(Long.parseLong(fields[6])); context.write(phoneNum, phoneDataWritable); } } } ``` Reducer代码: ``` public class PhoneDataReducer extends Reducer<Text, PhoneDataWritable, Text, PhoneDataWritable> { private PhoneDataWritable phoneDataWritable = new PhoneDataWritable(); @Override protected void reduce(Text key, Iterable<PhoneDataWritable> values, Context context) throws IOException, InterruptedException { long upFlow = 0; long downFlow = 0; long totalFlow = 0; for (PhoneDataWritable value : values) { upFlow += value.getUpFlow(); downFlow += value.getDownFlow(); totalFlow += value.getTotalFlow(); } phoneDataWritable.setUpFlow(upFlow); phoneDataWritable.setDownFlow(downFlow); phoneDataWritable.setTotalFlow(totalFlow); context.write(key, phoneDataWritable); } } ``` PhoneDataWritable代码: ``` public class PhoneDataWritable implements Writable { private long upFlow; private long downFlow; private long totalFlow; public PhoneDataWritable() { } public PhoneDataWritable(long upFlow, long downFlow, long totalFlow) { this.upFlow = upFlow; this.downFlow = downFlow; this.totalFlow = totalFlow; } public long getUpFlow() { return upFlow; } public void setUpFlow(long upFlow) { this.upFlow = upFlow; } public long getDownFlow() { return downFlow; } public void setDownFlow(long downFlow) { this.downFlow = downFlow; } public long getTotalFlow() { return totalFlow; } public void setTotalFlow(long totalFlow) { this.totalFlow = totalFlow; } @Override public void write(DataOutput out) throws IOException { out.writeLong(upFlow); out.writeLong(downFlow); out.writeLong(totalFlow); } @Override public void readFields(DataInput in) throws IOException { upFlow = in.readLong(); downFlow = in.readLong(); totalFlow = in.readLong(); } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + totalFlow; } } ``` Driver代码: ``` public class PhoneDataDriver { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "PhoneData"); job.setJarByClass(PhoneDataDriver.class); job.setMapperClass(PhoneDataMapper.class); job.setReducerClass(PhoneDataReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PhoneDataWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } } ``` 然后在命令行中运行以下命令即可启动MapReduce作业: ``` hadoop jar PhoneData.jar PhoneDataDriver phone_data(3) output ``` 其中,PhoneData.jar是打包好的MapReduce程序的jar包,phone_data(3)是输入文件路径,output是输出文件路径。 最后,在output目录中可以找到序列化后的结果文件。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值