根据此篇文章可以熟练使用Reduce阶段进行分组,并且了解jdk8新特性代码。
我们开看下原始数据
某地方编辑的txt文件数据不是太大:
English,liudehua,80
English,lijing,79
English,nezha,85
English,jinzha,60
English,muzha,71
English,houzi,99
English,libai,88
English,hanxin,66
English,zhugeliang,95
Math,liudehua,74
Math,lijing,72
Math,nezha,95
Math,jinzha,61
Math,muzha,37
Math,houzi,37
Math,libai,84
Math,hanxin,89
Math,zhugeliang,93
Computer,liudehua,54
Computer,lijing,73
Computer,nezha,86
Computer,jinzha,96
Computer,muzha,76
Computer,houzi,92
Computer,libai,73
Computer,hanxin,82
Computer,zhugeliang,100
一、MR之学生成绩,最高,最低,平均分数。
1.Mapper阶段代码
package com.studentExam.avgscore;
/**
* $功能描述: AvgscoreMapper
*
* @author :smart-dxw
* @version : 2019/6/19 21:58 v1.0
*/
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AvgMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 部曲
String line = value.toString();
String[] s = line.split(",");
// 这里k根据姓名区分
context.write(new Text(s[1]), new IntWritable(Integer.parseInt(s[2])));
}
}
2.Reducer阶段代码
package com.studentExam.avgscore;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
impo