一、介绍
“Map(映射)”:把一个复杂的工作分解成若干个相互独立的工作
“Reduce(归约)”:对Map的结果进行汇总
二、示例
1、自定义数据类型
创建一个JAVA类,若需要实现比较排序功能继承WritableComparable类,否则可继承Writable类。
package com.mapreduce_demand2;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class FlowBean implements WritableComparable<FlowBean>{
private Integer upFlow;
private Integer downFlow;
private Integer upCountFlow;
private Integer downCountFlow;
public FlowBean() {
}
public FlowBean(Integer upFlow, Integer downFlow, Integer upCountFlow, Integer downCountFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
this.upCountFlow = upCountFlow;
this.downCountFlow = downCountFlow;
}
public Integer getUpFlow() {
return upFlow;
}
public void setUpFlow(Integer upFlow) {
this.upFlow = upFlow;
}
public Integer getDownFlow() {
return downFlow;
}
public void setDownFlow(Integer downFlow) {
this.downFlow = downFlow;
}
public Integer getUpCountFlow() {
return upCountFlow;
}
public void setUpCountFlow(Integer upCountFlow) {
this.upCountFlow = upCountFlow;
}
public Integer getDownCountFlow() {
return downCountFlow;
}
public void setDownCountFlow(Integer downCountFlow) {
this.downCountFlow = downCountFlow;
}
/*
* 反序列化
*/
@Override
public void readFields(DataInput input) throws IOException {
this.upFlow = input.readInt();
this.downFlow = input.readInt();
this.upCountFlow = input.readInt();
this.downCountFlow = input.readInt();
}
/*
* 序列化
*/
@Override
public void write(DataOutput output) throws IOException {
output.writeInt(upFlow);
output.writeInt(downFlow);
output.writeInt(upCountFlow);
output.writeInt(downCountFlow);
}
/*
* 重写比较器(若继承Writable类,不需要重写此方法)
*/
@Override
public String toString() {
return "FlowBean [upFlow=" + upFlow + ", downFlow=" + downFlow + ", upCountFlow=" + upCountFlow
+ ", downCountFlow=" + downCountFlow + "]";
}
@Override
public int compareTo(FlowBean flowbean) {
if (this.getUpFlow().compareTo(flowbean.getUpFlow()) != 0) {
return -(this.getUpFlow().compareTo(flowbean.getUpFlow()));
}
else if(this.getDownFlow().compareTo(flowbean.getDownFlow()) != 0) {
return -(this.getDownFlow().compareTo(flowbean.getDownFlow()));
}
else if(this.getUpCountFlow().compareTo(flowbean.getUpCountFlow()) != 0) {
return -(this.getUpCountFlow().compareTo(flowbean.getUpCountFlow()));
}
else {
return -(this.getDownCountFlow().compareTo(flowbean.getDownCountFlow()));
}
}
}
2、Mapper
创建Mapper
选择Mapper
package com.mapreduce_demand3;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//对文件进行分割
String[] split = value.toString().split("\t");
//选出一个属性作为Key
String phoneNum = split[1];
//将选出的属性存放到自己定义的数据类型中作为Values
FlowBean flowbean = new FlowBean();
flowbean.setUpFlow(Integer.parseInt(split[6]));
flowbean.setDownFlow(Integer.parseInt(split[7]));
flowbean.setUpCountFlow(Integer.parseInt(split[8]));
flowbean.setDownCountFlow(Integer.parseInt(split[9]));
//将Key、Values存放至Context传给Reduce
context.write(new Text(phoneNum), flowbean);
}
}
3、Reducer
与创建Mapper方法相同创建Reducer
package com.mapreduce_demand3;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class FlowReduce extends Reducer<Text, FlowBean, Text, FlowBean> {
public void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
//实例化一个自定义数据类型,初始值为0
FlowBean flowBean = new FlowBean();
Integer upFlow = 0;
Integer downFlow = 0;
Integer upCountFlow = 0;
Integer downCountFlow = 0;
//将Map传来的Value值累加求和
for (FlowBean val : values) {
upFlow += val.getUpFlow();
downFlow += val.getDownFlow();
upCountFlow += val.getUpCountFlow();
downCountFlow += val.getDownCountFlow();
}
//将求和后的值存放至自定义数据类型
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
flowBean.setUpCountFlow(upCountFlow);
flowBean.setDownCountFlow(downCountFlow);
//将得到的结果存放至Context中传给Driver
context.write(key, flowBean);
}
}
4、Partitioner(可选)
创建一个JAVA类,实现将结果分区。
例如按照phoneNum将结果分成分别以“135”,“136”,“137”开头,以及其他
package com.mapreduce_demand3;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class FlowPartitioner extends Partitioner<Text, FlowBean>{
@Override
public int getPartition(Text key, FlowBean value, int args) {
String phoneNum = key.toString();
if (phoneNum.startsWith("135")) {
return 0;
} else if (phoneNum.startsWith("136")) {
return 1;
} else if (phoneNum.startsWith("137")) {
return 2;
} else {
return 3;
}
}
}
5、Driver
与创建Mapper方法相同创建Driver
选择该项目的Mapper以及Reducer
package com.mapreduce_demand3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FlowDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//如果给出的参数长度不为2,则提示错误并退出。
if(args.length != 2) {
System.err.println("Usage: <input> <output>");
System.exit(-1);
}
//如果输出目录已经存在,则先删除。
Path mypath = new Path(args[1]);
FileSystem hdfs = mypath.getFileSystem(conf);
if(hdfs.isDirectory(mypath)) {
hdfs.delete(mypath, true);
}
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(com.mapreduce_demand3.FlowDriver.class);
job.setMapperClass(com.mapreduce_demand3.FlowMapper.class);
job.setReducerClass(com.mapreduce_demand3.FlowReduce.class);
//若有Partitioner,则实现该方法。setNumReduceTasks方法中的参数与分区数一致。
job.setPartitionerClass(com.mapreduce_demand3.FlowPartitioner.class);
job.setNumReduceTasks(4);
//此处分别修改为接收到Reducer传来的数据类型。
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//分别修改输入、输出路径。
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}