初学(19)——MapReduce开发

一、介绍

“Map(映射)”:把一个复杂的工作分解成若干个相互独立的工作
“Reduce(归约)”:对Map的结果进行汇总

二、示例

1、自定义数据类型

创建一个JAVA类,若需要实现比较排序功能继承WritableComparable类,否则可继承Writable类。

package com.mapreduce_demand2;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean>{
	
	private Integer upFlow;
	private Integer downFlow;
	private Integer upCountFlow;
	private Integer downCountFlow;
	
	public FlowBean() {
		
	}
	
	public FlowBean(Integer upFlow, Integer downFlow, Integer upCountFlow, Integer downCountFlow) {
		this.upFlow = upFlow;
		this.downFlow = downFlow;
		this.upCountFlow = upCountFlow;
		this.downCountFlow = downCountFlow;
	}

	public Integer getUpFlow() {
		return upFlow;
	}

	public void setUpFlow(Integer upFlow) {
		this.upFlow = upFlow;
	}

	public Integer getDownFlow() {
		return downFlow;
	}

	public void setDownFlow(Integer downFlow) {
		this.downFlow = downFlow;
	}

	public Integer getUpCountFlow() {
		return upCountFlow;
	}

	public void setUpCountFlow(Integer upCountFlow) {
		this.upCountFlow = upCountFlow;
	}

	public Integer getDownCountFlow() {
		return downCountFlow;
	}

	public void setDownCountFlow(Integer downCountFlow) {
		this.downCountFlow = downCountFlow;
	}

	/*
	 * 反序列化
	 */
	@Override
	public void readFields(DataInput input) throws IOException {
		this.upFlow = input.readInt();
		this.downFlow = input.readInt();
		this.upCountFlow = input.readInt();
		this.downCountFlow = input.readInt();
	}

	/*
	 * 序列化
	 */
	@Override
	public void write(DataOutput output) throws IOException {
		output.writeInt(upFlow);
		output.writeInt(downFlow);
		output.writeInt(upCountFlow);
		output.writeInt(downCountFlow);
	}

	/*
	 * 重写比较器(若继承Writable类,不需要重写此方法)
	 */
	@Override
	public String toString() {
		return "FlowBean [upFlow=" + upFlow + ", downFlow=" + downFlow + ", upCountFlow=" + upCountFlow
				+ ", downCountFlow=" + downCountFlow + "]";
	}

	@Override
	public int compareTo(FlowBean flowbean) {
		if (this.getUpFlow().compareTo(flowbean.getUpFlow()) != 0) {
			return -(this.getUpFlow().compareTo(flowbean.getUpFlow()));
			}
		else if(this.getDownFlow().compareTo(flowbean.getDownFlow()) != 0) {
			return -(this.getDownFlow().compareTo(flowbean.getDownFlow()));
		}
		else if(this.getUpCountFlow().compareTo(flowbean.getUpCountFlow()) != 0) {
			return -(this.getUpCountFlow().compareTo(flowbean.getUpCountFlow()));
		}
		else {
			return -(this.getDownCountFlow().compareTo(flowbean.getDownCountFlow()));
		}
	}

}

2、Mapper

创建Mapper
在这里插入图片描述
选择Mapper
在这里插入图片描述

package com.mapreduce_demand3;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

	public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

		//对文件进行分割
		String[] split = value.toString().split("\t");
		//选出一个属性作为Key
		String phoneNum = split[1];
		
		//将选出的属性存放到自己定义的数据类型中作为Values
		FlowBean flowbean = new FlowBean();
		flowbean.setUpFlow(Integer.parseInt(split[6]));
		flowbean.setDownFlow(Integer.parseInt(split[7]));
		flowbean.setUpCountFlow(Integer.parseInt(split[8]));
		flowbean.setDownCountFlow(Integer.parseInt(split[9]));
		
		//将Key、Values存放至Context传给Reduce
		context.write(new Text(phoneNum), flowbean);
	}

}

3、Reducer

与创建Mapper方法相同创建Reducer
在这里插入图片描述

package com.mapreduce_demand3;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowReduce extends Reducer<Text, FlowBean, Text, FlowBean> {

	public void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {

		//实例化一个自定义数据类型,初始值为0
		FlowBean flowBean = new FlowBean();
		Integer upFlow = 0;
		Integer downFlow = 0;
		Integer upCountFlow = 0;
		Integer downCountFlow = 0;
		
		//将Map传来的Value值累加求和
		for (FlowBean val : values) {
			upFlow += val.getUpFlow();
			downFlow += val.getDownFlow();
			upCountFlow += val.getUpCountFlow();
			downCountFlow += val.getDownCountFlow();
		}
		
		//将求和后的值存放至自定义数据类型
		flowBean.setUpFlow(upFlow);
		flowBean.setDownFlow(downFlow);
		flowBean.setUpCountFlow(upCountFlow);
		flowBean.setDownCountFlow(downCountFlow);
		
		//将得到的结果存放至Context中传给Driver
		context.write(key, flowBean);
	}

}

4、Partitioner(可选)

创建一个JAVA类,实现将结果分区。
例如按照phoneNum将结果分成分别以“135”,“136”,“137”开头,以及其他

package com.mapreduce_demand3;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class FlowPartitioner extends Partitioner<Text, FlowBean>{

	@Override
	public int getPartition(Text key, FlowBean value, int args) {
		
		String phoneNum = key.toString();
		
		if (phoneNum.startsWith("135")) {
			return 0;
		} else if (phoneNum.startsWith("136")) {
			return 1;
		} else if (phoneNum.startsWith("137")) {
			return 2;
		} else {
			return 3;
		}
	}

}

5、Driver

与创建Mapper方法相同创建Driver
在这里插入图片描述
选择该项目的Mapper以及Reducer
在这里插入图片描述

package com.mapreduce_demand3;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlowDriver {

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		
		//如果给出的参数长度不为2,则提示错误并退出。
		if(args.length != 2) {
			System.err.println("Usage: <input> <output>");
			System.exit(-1);
		}
		
		//如果输出目录已经存在,则先删除。
		Path mypath = new Path(args[1]);
		FileSystem hdfs = mypath.getFileSystem(conf);
		if(hdfs.isDirectory(mypath)) {
			hdfs.delete(mypath, true);
		}
		
		Job job = Job.getInstance(conf, "JobName");
		job.setJarByClass(com.mapreduce_demand3.FlowDriver.class);
		job.setMapperClass(com.mapreduce_demand3.FlowMapper.class);
		job.setReducerClass(com.mapreduce_demand3.FlowReduce.class);
		
		//若有Partitioner,则实现该方法。setNumReduceTasks方法中的参数与分区数一致。
		job.setPartitionerClass(com.mapreduce_demand3.FlowPartitioner.class);
		job.setNumReduceTasks(4);

		//此处分别修改为接收到Reducer传来的数据类型。
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);

		//分别修改输入、输出路径。
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		if (!job.waitForCompletion(true))
			return;
	}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值