MR学生成绩统计
数据:
English,liudehua,80
English,lijing,79
English,nezha,85
English,jinzha,60
English,muzha,71
English,houzi,99
English,libai,88
English,hanxin,66
English,zhugeliang,95
Math,liudehua,74
Math,lijing,72
Math,nezha,95
Math,jinzha,61
Math,muzha,37
Math,houzi,37
Math,libai,84
Math,hanxin,89
Math,zhugeliang,93
Computer,liudehua,54
Computer,lijing,73
Computer,nezha,86
Computer,jinzha,96
Computer,muzha,76
Computer,houzi,92
Computer,libai,73
Computer,hanxin,82
Computer,zhugeliang,100
背景:
学校的学生的是一个非常大的生成数据的集体,比如每次考试的成绩
现有一个班级的学生一个月的考试成绩数据。
科目 姓名 分数
需求:
求出每个学生的最高分,平均成绩(整数(不能使用强制类型转换)),最低分
Map阶段:
package bigdata12.homework10.grade1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 学习成绩统计Map阶段
* @author Fantome
* @date 2019/6/19
*/
public class GradeMap extends Mapper<LongWritable, Text,Text,Text> {
Text k=new Text();
Text v=new Text();
/**
*
* @param key LongWritable
* @param value 输入信息:科目,name,grade
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key,
Text value,
Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
//按name在reduce端聚合
k.set(splits[1]);
v.set(splits[0]+"\t"+splits[2]);
context.write(k,v);
}
}
Reduce阶段:
package bigdata12.homework10.grade1;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* 学习成绩统计Reduce阶段
* @author Fantome
* @date 2019/6/19
*/
public class GradeReduce extends Reducer<Text,Text,Text, NullWritable> {
Text k=new Text();
/**
* 获取学生信息并排序
* @param name 学生姓名
* @param infos 这位学生的各科成绩信息
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text name, Iterable<Text> infos, Context context) throws IOException, InterruptedException {
//最大分数,最小分数
int gradeMax=0;
int gradeMin=100;
//最大分数对应成绩,最小分数对应成绩
String subjectMax ="";
String subjectMin="";
//统计分数和
int gradeSum=0;
//统计成绩个数
int num= 0;
for (Text info:infos){
//infos包括科目,分数
String[] splits = info.toString().split("\t");
int grade=Integer.parseInt(splits[1]);
String subject=splits[0];
//获得最大值和对应科目
if (grade>=gradeMax){
gradeMax=grade;
subjectMax=subject;
}
//获得最小值和对应科目
if (grade<=gradeMin){
gradeMin=grade;
subjectMin=subject;
}
gradeSum+=grade;
num+=1;
}
int avg=gradeSum/num;
//输出name+subjectMax:gradeMax+avg+subjectMin:gradeMin
k.set(name+"\t"+
"最大分数科目:"+subjectMax+":"+gradeMax+"\t"+
"平均分数:"+avg+"\t"+
"最小分数科目:"+subjectMin+":"+gradeMin);
context.write(k,NullWritable.get());
}
}