自定义类bean进行MapReduce参数传输

数据类型:

1363157984041 	13660577991(手机号)	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	24	9	6960(上行流量)	690(下行流量)	200

需求:
求每个手机号的上行,下行流量总和,和总流量,并以bean方式传输,并按照手机号开头三位进行分区
统计类:

package mrpro927;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



/*
 *需求1:同一个手机号上行,下行,总流量之和
 * 
 */
public class phoneDataBeans {
	//
	public static class MyMapper extends Mapper<LongWritable, Text, Text, phoneBean>{
		Text t = new Text();
		phoneBean p = new phoneBean();
		@Override
		protected void map(LongWritable key, Text value, 
				Mapper<LongWritable, Text, Text, phoneBean>.Context context)
				throws IOException, InterruptedException {
			String[] split = value.toString().split("\t");
			//清洗出无用数据
			if(split.length == 11){
				t.set(split[1]);
				p.setUpflow(Integer.parseInt(split[7]));
				p.setDownflow(Integer.parseInt(split[8]));
				p.setSum(p.getUpflow()+p.getDownflow());
				context.write(t, p);
			}
		}
	}
	
	//每组调用一次
	public static class MyReducer extends Reducer<Text, phoneBean, Text, phoneBean>{
		phoneBean p = new phoneBean();
		@Override
		protected void reduce(Text key, Iterable<phoneBean> values,
				Reducer<Text, phoneBean, Text, phoneBean>.Context context) 
						throws IOException, InterruptedException {
			int upsum = 0;
			int downsum = 0;
			int sum = 0;
			for(phoneBean t:values){
				upsum += t.getUpflow();
				downsum += t.getDownflow();
				sum +=t.getSum();
			}
			
			p.setUpflow(upsum);
			p.setDownflow(downsum);
			p.setSum(sum);
			context.write(key, p);
		}
	}
	
	
	public static class MyPartitioner extends Partitioner<Text, phoneBean>{

		@Override
		public int getPartition(Text key, phoneBean value, int numPartitions) {
			String substring = key.toString().substring(0, 3);
			if(substring.equals("136")){
				return 0;
			}else if(substring.equals("137")){
				return 1;
			}else if(substring.equals("138")){
				return 2;
			}else if(substring.equals("139")){
				return 3;
			}else {
				return 4;
			}
		}
		
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		//加载配置文件
		Configuration conf = new Configuration();
		//eclipse运行设置linux用户名
		System.setProperty("HADOOP_USER_NAME", "mading");
		//启动一个job
		Job job = Job.getInstance(conf);
		//指定当前任务的主类
		job.setJarByClass(phoneDataBeans.class);
		//指定mapper和reducer类
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		//指定map输出的key,value类型,如果和reduce的输出类型相同的情况下可以省略
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(phoneBean.class);
		//指定reduce输出的key,value类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		//指定分区算法
		job.setPartitionerClass(MyPartitioner.class);
		//设置reducetask的并行度
		job.setNumReduceTasks(5);
		//指定文件输入的路径,这里是HA高可用集群的路径
		FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/phonedatain"));
		//指定文件的输出路径
		FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/pout01"));
		//提交job
		job.waitForCompletion(true);
	}

}

bean类,注意实现Writable接口
因为没有进行排序算法的重写,所以这种自定义类只能放在value的位置,不能放在key的位置

package mrpro927;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

/*
 * 自定义的类作为MapReduce传输对象的时候,必须序列化,实现Writable 接口
 */
public class phoneBean implements Writable{
	private int upflow;
	private int downflow;
	private int sum;
	public int getUpflow() {
		return upflow;
	}
	public void setUpflow(int upflow) {
		this.upflow = upflow;
	}
	public int getDownflow() {
		return downflow;
	}
	public void setDownflow(int downflow) {
		this.downflow = downflow;
	}
	public int getSum() {
		return sum;
	}
	public void setSum(int sum) {
		this.sum = sum;
	}
	@Override
	public String toString() {
		return upflow + "\t" + downflow + "\t" + sum ;
	}
	//序列化的方法,对象=》二进制
	//map发到reduce端的的时候先序列化
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(upflow);
		out.writeInt(downflow);
		out.writeInt(sum);
		
	}
	
	//反序列化的方法,到reduce端的时候进行反序列化,和序列化的顺序一定要一致
	@Override
	public void readFields(DataInput in) throws IOException {
		this.upflow = in.readInt();
		this.downflow = in.readInt();
		this.sum = in.readInt();
	}

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值