# 一.MapReduce应用场景

MR能解决什么问题？一般来说，用的最多的应该是日志分析，海量数据排序处理。最近一段时间公司用MR来解决大量日志的离线并行分析问题。

# 三.常用计算模型

## 1.部门表

DEPTNO,DNAME,LOC    // 部门号，部门名称，所在地

10,ACCOUNTING,NEW YORK
20,RESEARCH,DALLAS
30,SALES,CHICAGO
40,OPERATIONS,BOSTON

## 2.员工表

EMPNO,ENAME,JOB,HIREDATE,SAL,COMM,DEPTNO,MGR // 员工号，英文名，职位，聘期，工资，奖金，所属部门，管理者
7369,SMITH,CLERK,1980-12-17 00:00:00.0,800,,20,7902
7499,ALLEN,SALESMAN,1981-02-20 00:00:00.0,1600,300,30,7698
7521,WARD,SALESMAN,1981-02-22 00:00:00.0,1250,500,30,7698
7566,JONES,MANAGER,1981-04-02 00:00:00.0,2975,,20,7839
7654,MARTIN,SALESMAN,1981-09-28 00:00:00.0,1250,1400,30,7698
7698,BLAKE,MANAGER,1981-05-01 00:00:00.0,2850,,30,7839
7782,CLARK,MANAGER,1981-06-09 00:00:00.0,2450,    ,10,7839
7839,KING,PRESIDENT,1981-11-17 00:00:00.0,5000,,10,
7844,TURNER,SALESMAN,1981-09-08 00:00:00.0,1500,0,30,7698
7900,JAMES,CLERK,1981-12-03 00:00:00.0,950,,30,7698
7902,FORD,ANALYST,1981-12-03 00:00:00.0,3000,,20,7566
7934,MILLER,CLERK,1982-01-23 00:00:00.0,1300,,10,7782

## 3.实例化为bean

emp.java
public Emp(String inStr) {
String[] split = inStr.split(",");
this.empno = (split[0].isEmpty()? "" : split[0]);
this.ename = (split[1].isEmpty() ? "" : split[1]);
this.job = (split[2].isEmpty() ? "" : split[2]);
this.hiredate = (split[3].isEmpty() ? "" : split[3]);
this.sal = (split[4].isEmpty() ? "0" : split[4]);
this.comm = (split[5].isEmpty() ? "" : split[5]);
this.deptno = (split[6].isEmpty() ? "" : split[6]);
try {
this.mgr = (split[7].isEmpty() ? "" : split[7]);
} catch (IndexOutOfBoundsException e) {     //防止最后一位为空的情况
this.mgr = "";
}
}


dept.java
public Dept(String string) {
String[] split = string.split(",");
this.deptno = split[0];
this.dname = split[1];
this.loc = split[2];
}


## 4.模型分析

### 4.1 求和

public static class Map_1 extends MapReduceBase implements Mapper<Object, Text, Text, IntWritable> {
public void map(Object key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
output.collect(new Text(emp.getDeptno()), new IntWritable(Integer.parseInt(emp.getSal())));  // { k=部门号，v=员工薪资}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}
}
}

public static class Reduce_1 extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum = sum + values.next().get();
}
output.collect(key, new IntWritable(sum));
}

}

### 4.3 平均值

public static class Map_2 extends MapReduceBase implements Mapper<Object, Text, Text, IntWritable> {
public void map(Object key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
output.collect(new Text(emp.getDeptno()), new IntWritable(Integer.parseInt(emp.getSal())));  //{ k=部门号，v=薪资}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}

}
}

public static class Reduce_2 extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
double sum = 0; //部门工资
int count =0 ; //人数
while (values.hasNext()) {
count++;
sum = sum + values.next().get();
}
output.collect(key, new Text( count+" "+sum/count));
}

}

### 4.4 分组排序

	public static class Map_3 extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
output.collect(new Text(emp.getDeptno()), new Text(emp.getHiredate() + "~" + emp.getEname())); // { k=部门号，v=聘期}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}

}
}

public static class Reduce_3 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
DateFormat sdf = DateFormat.getDateInstance();
Date minDate = new Date(9999, 12, 30);
Date d;
String[] strings = null;
while (values.hasNext()) {
try {
strings = values.next().toString().split("~"); // 获取名字和日期
d = sdf.parse(strings[0].toString().substring(0, 10));
if (d.before(minDate)) {
minDate = d;
}
} catch (ParseException e) {
e.printStackTrace();
}
}
output.collect(key, new Text(minDate.toLocaleString() + " " + strings[1]));

}

}

### 4.5 多表关联

public static class Map_4 extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
try {
String fileName = ((FileSplit) reporter.getInputSplit()).getPath().getName();
if (fileName.equalsIgnoreCase("emp.txt")) {
Emp emp = new Emp(value.toString());
output.collect(new Text(emp.getDeptno()), new Text("A#" + emp.getSal()));
}
if (fileName.equalsIgnoreCase("dept.txt")) {
Dept dept = new Dept(value.toString());
output.collect(new Text(dept.getDeptno()), new Text("B#" + dept.getLoc()));
}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}

}
}

public static class Reduce_4 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
String deptV;
Vector<String> empList = new Vector<String>(); // 保存EMP表的工资数据
Vector<String> deptList = new Vector<String>(); // 保存DEPT表的位置数据
while (values.hasNext()) {
deptV = values.next().toString();
if (deptV.startsWith("A#")) {
}
if (deptV.startsWith("B#")) {
}
}
double sumSal = 0;
for (String location : deptList) {
for (String salary : empList) {
//每个城市员工工资总和
sumSal = Integer.parseInt(salary) + sumSal;
}
output.collect(new Text(location), new Text(Double.toString(sumSal)));
}
}

}

### 4.6 单表关联

public static class Map_5 extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
output.collect(new Text(emp.getMgr()), new Text("A#" + emp.getEname() + "~" + emp.getSal()));  // 员工表 { k=上司名，v=员工工资}
output.collect(new Text(emp.getEmpno()), new Text("B#" + emp.getEname() + "~" + emp.getSal()));// “经理表” { k=员工名，v=员工工资}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}
}
}

public static class Reduce_5 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
String value;
Vector<String> empList = new Vector<String>(); // 员工表
Vector<String> mgrList = new Vector<String>(); // 经理表
while (values.hasNext()) {
value = values.next().toString();
if (value.startsWith("A#")) {
}
if (value.startsWith("B#")) {
}
}
String empName, empSal, mgrSal;

for (String emploee : empList) {
for (String mgr : mgrList) {
String[] empInfo = emploee.split("~");
empName = empInfo[0];
empSal = empInfo[1];
String[] mgrInfo = mgr.split("~");
mgrSal = mgrInfo[1];
if (Integer.parseInt(empSal) > Integer.parseInt(mgrSal)) {
output.collect(key, new Text(empName + " " + empSal));
}
}
}
}

}

### 4.7 TOP N

public static class Map_8 extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
output.collect(new Text("1"), new Text(emp.getEname() + "~" + emp.getSal()));    // { k=随意字符串或数字，v=员工名字+薪资}
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}

}
}

public static class Reduce_8 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
Map<Integer, String> emp = new TreeMap<Integer, String>();   // TreeMap默认key升序排列，巧妙利用这点可以实现top N
while (values.hasNext()) {
String[] valStrings = values.next().toString().split("~");
emp.put(Integer.parseInt(valStrings[1]), valStrings[0]);
}
int count = 0; // 计数器
for (Iterator<Integer> keySet = emp.keySet().iterator(); keySet.hasNext();) {
if (count < 3) {  //  N =3
Integer current_key = keySet.next();
output.collect(new Text(emp.get(current_key)), new Text(current_key.toString())); // 迭代key，即SAL
count++;
} else {
break;
}
}
}
}


### 4.8 降序排序

public static class Map_9 extends MapReduceBase implements Mapper<Object, Text, Text, Text> {
public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
try {
Emp emp = new Emp(value.toString());
int totalSal = Integer.parseInt(emp.getComm()) + Integer.parseInt(emp.getSal());
output.collect(new Text("1"), new Text(emp.getEname() + "~" + totalSal));
} catch (Exception e) {
reporter.getCounter(ErrCount.LINESKIP).increment(1);
WriteErrLine.write("./input/" + this.getClass().getSimpleName() + "err_lines", reporter.getCounter(ErrCount.LINESKIP).getCounter() + " " + value.toString());
}

}
}

public static class Reduce_9 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
Map<Integer, String> emp = new TreeMap<Integer, String>(
// 重写比较器，使降序排列
new Comparator<Integer>() {
public int compare(Integer o1, Integer o2) {
return o2.compareTo(o1);
}
});
while (values.hasNext()) {
String[] valStrings = values.next().toString().split("~");
emp.put(Integer.parseInt(valStrings[1]), valStrings[0]);
}
for (Iterator<Integer> keySet = emp.keySet().iterator(); keySet.hasNext();) {
Integer current_key = keySet.next();
output.collect(new Text(emp.get(current_key)), new Text(current_key.toString())); // 迭代key，即SAL
}
}
}

# 四.总结

• 本文已收录于以下专栏：

## 回顾——MapReduce计算模型

• yinhenan11
• 2016年12月16日 12:47
• 270

## MapReduce数据并行计算模型介绍

1、MapReduce来龙去脉  MapReduce是一个说难懂也难懂、说好懂也好懂的概念。  说它难懂，是因为，如果你只理论上的去学习、去理解，确实很难懂。  说它好懂，是因为，如果你亲手...
• somilong
• 2016年06月03日 22:26
• 1315

## Hadoop MapReduce 计算模型分析（一）

Hadoop MapReduce 计算模型分析（一） 先简单说一下MapReduce计算模型：        首先这是一个分布式对大数据处理的计算模型。在多个节点上并行处理大数据。在阅读时，你要将...
• holandstone
• 2017年07月12日 16:25
• 279

## MapReduce计算模型--简单层次Top-Down细化

Hadoop MapReduce 计算模型分析（一） 先简单说一下MapReduce计算模型：        首先这是一个分布式对大数据处理的计算模型。在多个节点上并行处理大数据。在阅读时，你要将自己...
• RichyTang
• 2013年08月01日 23:38
• 1411

mapreduce
• q383965374
• 2016年03月29日 16:52
• 3466

• zhangdong2012
• 2016年11月15日 16:10
• 482

• u013800147
• 2015年05月24日 14:36
• 3006

• tiandijun
• 2015年05月08日 09:47
• 1300

• robertleepeak
• 2010年11月10日 22:01
• 13984