数据样例:
数据样例: xx(每次的考试分数 次数不固定)
课程 姓名 xx xx xx xx
computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75
需求:
1.求出每门课程参考学生成绩第二高的学生的信息:课程,姓名和平均分
2.自定义序列化类,分区,map和reduce
1.自定义javabean类
public class Scorebean implements WritableComparable<Scorebean> {
private String course;
private String name;
private double avgscore;//平均分
public Scorebean() {
}
public void set(String course, String name, double avgscore) {
this.course = course;
this.name = name;
this.avgscore = avgscore;
}
public String getCourse() {
return course;
}
public void setCourse(String course) {
this.course = course;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public double getAvgscore() {
return avgscore;
}
public void setAvgscore(double avgscore) {
this.avgscore = avgscore;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(course);
out.writeUTF(name);
out.writeDouble(avgscore);
}
@Override
public void readFields(DataInput in) throws IOException {
this.course = in.readUTF();
this.name = in.readUTF();
this.avgscore = in.readDouble();
}
/**
* 排序规则
* compareTo方法既充当排序用,用充当分组规则
*/
@Override
public int compareTo(Scorebean cs) {
int result;
//首先比较课程
int i = course.compareTo(cs.getCourse());
//int i = course.compareTo(cs.getCourse());
if (i > 0){
result=1;
}else if(i < 0){
result =-1;
}else{
//如果进入到这里 意味着两个课程一样 此时根据平均分倒序进行排序
result = avgscore > cs.getAvgscore() ? -1:(avgscore < cs.getAvgscore() ? 1:0);
}
return result;
}
@Override
public String toString() {
return course + "\t" + name + "\t" + avgscore;
}
}
2.自定义分组类
public class ScoreGroupingComparator extends WritableComparator {
protected ScoreGroupingComparator(){
super(Scorebean.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
//类型转换
Scorebean aBean = (Scorebean) a;
Scorebean bBean = (Scorebean) b;
//本需求中 分组规则是,只要前后两个数据的Course一样 就应该分到同一组。
//只要compare 返回0 mapreduce框架就认为两个一样 返回不为0 就认为不一样
return aBean.getCourse().compareTo(bBean.getCourse());
}
}
3.Mapper类
public class Mapper_CS extends Mapper<LongWritable, Text, Scorebean, NullWritable> {
/*
//输入的K,V对:LongWritable(起始偏移量<key值在这行中的索引位置>), Text(这行的内容)
//输出的K,V对:Scorebean, NullWritable,由需求决定
//Scorebean中定义了String course, String name, double avgscore三个需要的变量,三个变量不可分割
此需求也只需要用到此三个变量,以此作为输出的K值,V值由NullWritable补全
--->也可在Scorebean中只定义了String course, String name两个变量,以此作为输出的K值,那V值就得为avgscore,
V值类型就可写为doublewritable
*/
Scorebean keyOut = new Scorebean();//指定输出变量的 K值
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
//String course = splits[0];
//String name = splits[1];
int sum = 0;
for(int i=2; i<splits.length; i++){
sum += Integer.valueOf(splits[i]);
}
double avgScore = Math.round(sum * 10.0/(splits.length-2))/10D;
keyOut.set(splits[0],splits[1],avgScore);
/*keyOut.setCourse(course);
keyOut.setName(name);
keyOut.setAvgscore(avgScore);*/
context.write(keyOut, NullWritable.get());
//若V值就得为avgscore,doublewritable类型,则此行为context.write(keyOut, doublewritable);
}
}
4.Reducer类
public class Reducer_CS extends Reducer<Scorebean, NullWritable, Scorebean, NullWritable> {
@Override
protected void reduce(Scorebean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
int num = 0;
//求第二名
for(NullWritable v: values){
num ++;
//System.out.println(key);
//System.out.println(v);
if(num == 2){
context.write(key, v);
break;
}
}
/*//求前两个,top2
for(NullWritable nvl: values){
context.write(key, nvl);
num ++;
if(num == 2){
break;
}
}*/
}
}
5.Driver类实现
public class CSTop02Driver {
public static void main(String[] args) throws Exception{
//配置文件对象
Configuration conf = new Configuration();
// 创建作业实例
Job job = Job.getInstance(conf, CSTop02Driver.class.getSimpleName());
// 设置作业驱动类
//conf.set("mapreduce.framework.name","yarn");
job.setJarByClass(CSTop02Driver.class);
// 设置作业mapper reducer类
job.setMapperClass(Mapper_CS.class);
job.setReducerClass(Reducer_CS.class);
// 设置作业mapper阶段输出key value数据类型
job.setMapOutputKeyClass(Scorebean.class);
job.setMapOutputValueClass(NullWritable.class);
//设置作业reducer阶段输出key value数据类型 也就是程序最终输出数据类型
job.setOutputKeyClass(Scorebean.class);
job.setOutputValueClass(NullWritable.class);
//todo 设置自定义分组
job.setGroupingComparatorClass(ScoreGroupingComparator.class);
// 配置作业的输入数据路径
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path("D:\\data\\score\\input"));
// 配置作业的输出数据路径
//FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path("D:\\data\\score\\output_top2"));
//判断输出路径是否存在 如果存在删除
FileSystem fs = FileSystem.get(conf);
if(fs.exists(new Path("D:\\data\\score\\output_top2"))){
fs.delete(new Path("D:\\data\\score\\output_top2"),true);
}
// 提交作业并等待执行完成
boolean resultFlag = job.waitForCompletion(true);
//程序退出
System.exit(resultFlag ? 0 :1);
}
}