【hadoop学习项目】5. 自定义序列化数据对象Bean进行处理

0. 项目结构

在这里插入图片描述

训练数据

phone address name consum
13877779999 bj zs 2145
13766668888 sh ls 1028
13766668888 sh ls 9987
13877779999 bj zs 5678
13544445555 sz ww 10577
13877779999 sh zs 2145
13766668888 sh ls 9987
13877779999 bj zs 2184
13766668888 sh ls 1524
13766668888 sh ls 9844
13877779999 bj zs 6554
13544445555 sz ww 10584
13877779999 sh zs 21454
13766668888 sh ls 99747

目标:统计不同地方的购物金额总和

1. FlowBean

package hadoop_test.avro_test_05.domain;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean  implements Writable{

	private String phone;
	private String add;
	private String name;
	private long consum;

//	序列化
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(phone);
		out.writeUTF(add);
		out.writeUTF(name);
		out.writeLong(consum);

	}

//	反序列化,跟序列化的顺序不能改变
	@Override
	public void readFields(DataInput in) throws IOException {
		this.phone=in.readUTF();
		this.add=in.readUTF();
		this.name=in.readUTF();
		this.consum=in.readLong();
	}

	public String getPhone() {
		return phone;
	}

	public String getAdd() {
		return add;
	}

	public String getName() {
		return name;
	}

	public long getConsum() {
		return consum;
	}

	public void setPhone(String phone) {
		this.phone = phone;
	}

	public void setAdd(String add) {
		this.add = add;
	}

	public void setName(String name) {
		this.name = name;
	}

	public void setConsum(long consum) {
		this.consum = consum;
	}

	@Override
	public String toString() {
		return "FlowBean [phobe="+phone+",add="+add+",name="+name+",consum="+consum+"]";
	}
}

继承Writable
hadoop中Writable类和WritableComparable类、序列化和反序列化

2. FlowDriver

package hadoop_test.avro_test_05.flow;

import hadoop_test.Utils_hadoop;
import hadoop_test.avro_test_05.domain.FlowBean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class FlowDriver {
	public static void main(String[] args) throws Exception {
		System.setProperty("HADOOP_USER_NAME", "root");

		Configuration conf=new Configuration();
		Job job=Job.getInstance(conf);

		job.setJarByClass(FlowDriver.class);
		job.setMapperClass(FlowMapper.class);
		job.setReducerClass(FlowReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);

		FileInputFormat.setInputPaths(job,new Path("/hadoop_test/avro/avro.txt"));
		FileOutputFormat.setOutputPath(job,new Path("/hadoop_test/avro/result"));
		
		job.waitForCompletion(true);
	}

}

3. FlowMapper

package hadoop_test.avro_test_05.flow;

import hadoop_test.avro_test_05.domain.FlowBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		String line=value.toString();
		//#实例化
		FlowBean flowBean=new FlowBean();
		//给一个实例的属性赋予初始值,
		flowBean.setPhone(line.split(" ")[0]);
		flowBean.setAdd(line.split(" ")[1]);
		flowBean.setName(line.split(" ")[2]);
		// 统计总额,同时将文本数据转换为int型
		flowBean.setConsum(Integer.parseInt(line.split(" ")[3]));
		System.out.println(flowBean);
		context.write(new Text(flowBean.getName()), flowBean);
	}
}

4. FlowReducer

package hadoop_test.avro_test_05.flow;

import hadoop_test.avro_test_05.domain.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReducer extends Reducer<Text, FlowBean,Text,FlowBean> {

	@Override
	protected void reduce(Text name, Iterable<FlowBean> values,
						  Context context) throws IOException, InterruptedException {
		FlowBean tmp=new FlowBean();

		for(FlowBean flowbean:values){
//		flowbean [phobe=13766668888,add=sh,name=ls,consum=9844]
			tmp.setAdd(flowbean.getAdd());
			tmp.setPhone(flowbean.getPhone());
			tmp.setName(flowbean.getName());
//     tmp.getComsum(初始化是0)+flowbean.getConsum()[9844]
			tmp.setConsum(tmp.getConsum()+flowbean.getConsum());
//			在第一轮 tmp.Consum = tmp.getConsum()=[9844]
//			在第二轮时,FlowBean [phobe=13766668888,add=sh,name=ls,consum=1000]
//			tmp.Consum = 9844 + 1000 = 10844
		}
		context.write(name, tmp);
	}
}

查看统计结果
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

辰阳星宇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值