# MapReduce练习-----学生成绩相关题目

1、统计每门课程的参考人数和课程平均分
2、统计每门课程参考学生的平均分，并且按课程存入不同的结果文件，要求一门课程一个结果文件，并且按平均分从高到低排序，分数保留一位小数。
3、求出每门课程参考学生成绩最高的学生的信息：课程，姓名和平均分。

computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

###### 问题一：统计每门课程的参考人数和课程平均分

import java.io.IOException;

/**
* 统计每门课程的参考人数和课程平均分
* computer,huangxiaoming,85,86,41,75,93,42,85
*/
public class CourseOne {

public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
//对数据进行解析，分析数据可知第三个字段是学生在某门课程中的考试次数
//根据问题可以分析，统计参考人数，只有使用课程作为key，在reduce阶段终极数据条数即可
//对于课程的平均分要统计该门课程所有学生全部的考试次数，以及总分
//在mapper阶段，能统计每一个学生在每个课程中的总考试次数和总分
String[] lines = value.toString().split(",");
//sum用来统计学生在某门课程中的考试成绩
long sum = 0L;
//totalTimes用来统计学生在某门课程中的考试次数
//computer,huangxiaoming,85,86,41,75,93,42,85
//首先数据时通过','进行分隔的，所以通过mapper逐行读取然后根据','进行切分得到一个数组
//然后从第三个元素开始就是某位学生在某门课程中一次考试的成绩
//所以使用数组长度减去2就是该学生在该课程中的总考试次数
long totalTimes = lines.length-2;
//通过循环遍历累加该学生在该课程中的考试成绩
for (int i = 2; i < lines.length; i++) {
sum += Long.parseLong(lines[i]);
}
//最后的输出，使用课程名称作为key 例如：computer
//使用拼接字符串的形式创建value，方便reducer阶段的处理
//使用totalTimes+"_"+sum 这种拼接方式，
//考试次数  + 总成绩
context.write(new Text(lines[0]), new Text(totalTimes+"_"+sum));
}
}

public static class MyReducer extends Reducer<Text,Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
//相同的课程会被分到一个组
//考试人数计数器
int count = 0;
//得分累加器
int totalScore = 0;
//考试次数计数器
int examTimes = 0;
for (Text t : values) {
String[] arrs = t.toString().split("_");
count++;
totalScore += Integer.parseInt(arrs[1]);
examTimes += Integer.parseInt(arrs[0]);
}

//求平均分
float avg = totalScore*1.0F/examTimes;
//输出结果
context.write(key, new Text(count+"\t"+avg));

}
}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(CourseOne.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path("G:/files/mr/day2/q3/input"));
FileOutputFormat.setOutputPath(job,new Path("G:/files/mr/day2/q3/output1") );

boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0:1);
}
}
###### 问题二：统计每门课程参考学生的平均分，并且按课程存入不同的结果文件，要求一门课程一个结果文件，并且按平均分从高到低排序，分数保留一位小数。

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CourseBean implements WritableComparable<CourseBean>{
private String course; //课程名
private String name; //学生姓名
private float avg; //平均分

public String getCourse() {
return course;
}
public void setCourse(String course) {
this.course = course;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getAvg() {
return avg;
}
public void setAvg(float avg) {
this.avg = avg;
}

public CourseBean(String course, String name, float avg) {
super();
this.course = course;
this.name = name;
this.avg = avg;
}

public CourseBean() {

}

/**
* 通过toString方法自定义输出类型
*/
@Override
public String toString() {
return course + "\t" + name + "\t" + avg;
}

/**
* 序列化
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(course);
out.writeUTF(name);
out.writeFloat(avg);
}

/**
* 反序列化
*/
@Override
public void readFields(DataInput in) throws IOException {
}

//比较规则
@Override
public int compareTo(CourseBean o) {
float flag = o.avg - this.avg;
return flag > 0.0f ? 1:-1;
}

import org.apache.hadoop.io.NullWritable;

public class CoursePartitioner extends Partitioner<CourseBean, NullWritable>{

/*algorithm	6	71.12195
computer	10	69.77273
english	9	66.35294
math	7	73.07843*/
@Override
public int getPartition(CourseBean key, NullWritable value, int numPartitions) {
if("algorithm".equals(key.getCourse())){
return 0;
}else if("computer".equals(key.getCourse())){
return 1;
}else if("english".equals(key.getCourse())){
return 2;
}else{
return 3;
}
}

}
mapreduce程序：
import java.io.IOException;
import java.text.DecimalFormat;

/**
* 统计每门课程的参考人数和课程平均分
* 考虑到要需求要根据课程进行分组并对平均值进行排序，这里使用自定义bean的形式来进行处理
* 因为要将数据根据课程进行分区并写入到不容的文件中，所以这里使用自定partitioner组件进行分区
*
*/
public class CourseTwo {
static Text text = new Text();

public static class MyMapper extends Mapper<LongWritable, Text, CourseBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] lines = value.toString().split(",");
long sum = 0L;
long totalTimes = lines.length-2;
for (int i = 2; i < lines.length; i++) {
sum += Long.parseLong(lines[i]);
}
//格式化平均分使用，保留一位有效小数
DecimalFormat df=new DecimalFormat(".0");
//计算某个学生在某门课程中的平均分
float avg = sum*1.0f/totalTimes;
String b = df.format(avg);
//构建mapper输出的key
CourseBean cb = new CourseBean(lines[0],lines[1],Float.parseFloat(b));

context.write(cb, NullWritable.get());
}
}

public static class MyReducer extends Reducer<CourseBean, NullWritable,CourseBean, NullWritable>{
@Override
protected void reduce(CourseBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//因为自定义了分区组件，自定义类型有排序规则，所以这里直接输出就可以了
for (NullWritable nullWritable : values) {
context.write(key, nullWritable);
}
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(CourseTwo.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);

job.setOutputKeyClass(CourseBean.class);
job.setOutputValueClass(NullWritable.class);

//使用自定义的分区组件
job.setPartitionerClass(CoursePartitioner.class);

FileInputFormat.setInputPaths(job, new Path("G:/files/mr/day2/q3/input"));
FileOutputFormat.setOutputPath(job,new Path("G:/files/mr/day2/q3/output2") );

boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0:1);
}
}
###### 问题三：求出每门课程参考学生成绩最高的学生的信息：课程，姓名和平均分

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CourseBean2 implements WritableComparable<CourseBean2>{
private String course;
private String name;
private float avg;
private long maxScore;

public long getMaxScore() {
return maxScore;
}
public void setMaxScore(long maxScore) {
this.maxScore = maxScore;
}
public String getCourse() {
return course;
}
public void setCourse(String course) {
this.course = course;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getAvg() {
return avg;
}
public void setAvg(float avg) {
this.avg = avg;
}

public CourseBean2(String course, String name, float avg, long maxScore) {
super();
this.course = course;
this.name = name;
this.avg = avg;
this.maxScore = maxScore;
}

public CourseBean2() {

}

@Override
public String toString() {
return course+"\t"+name + "\t" + avg +"\t"+maxScore;
}

@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(course);
out.writeUTF(name);
out.writeFloat(avg);
out.writeLong(maxScore);
}
@Override
public void readFields(DataInput in) throws IOException {
}
@Override
public int compareTo(CourseBean2 o) {
/*首先通过课程进行排序，课程相同的通过成绩进行排序
值得一提的是，使用自定义分组组件指定的分组字段，一定要在comparaTo方法中使用字段得而前面
eg: a
a b
a b c
a b c d
a b c d e  */
int index = o.course.compareTo(this.course);
if(index == 0){
long flag = o.maxScore - this.maxScore;
return flag > 0L ? 1:-1;

}else{
return index > 0L ? 1:-1;
}
}

}

import org.apache.hadoop.io.WritableComparable;

/**
* 自定义分组组件
* 1、如果没有定义自定义的分组组件，默认的使用comparaTo方法中的字段进行分组排序
* 这里要继承WritableComparator类，来进行序列化和比较
*/
public class CourseGroupComparator  extends WritableComparator{

/**
* 为了解决下面出现空指针的现象，所以在类中声明一个构造函数来进行创建
*/
public CourseGroupComparator() {
super(CourseBean2.class,true);
}

/**
* 如果直接这样使用会出现一个空指针的错误，主要是a,b没有进行构造，所以是空的；
* 创建一个构造方法就可以进行解决
*/
@Override
public int compare(WritableComparable a, WritableComparable b) {
CourseBean2 cb1 = (CourseBean2) a;
CourseBean2 cb2 = (CourseBean2) b;
//这里是根据课程名称进行处理的
return cb1.getCourse().compareTo(cb2.getCourse());
}
}
mapreduce程序：
import java.io.IOException;
import java.text.DecimalFormat;

/**
* 统计每门课程的参考人数和课程平均分
* 考虑到要需求要根据课程进行分组并对平均值进行排序，这里使用自定义bean的形式来进行处理
* 因为要将数据根据课程进行分区并写入到不容的文件中，所以这里使用自定partitioner组件进行分区
*
*/
public class CourseTwo {
static Text text = new Text();

public static class MyMapper extends Mapper<LongWritable, Text, CourseBean, NullWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] lines = value.toString().split(",");
long sum = 0L;
long totalTimes = lines.length-2;
for (int i = 2; i < lines.length; i++) {
sum += Long.parseLong(lines[i]);
}
//格式化平均分使用，保留一位有效小数
DecimalFormat df=new DecimalFormat(".0");
//计算某个学生在某门课程中的平均分
float avg = sum*1.0f/totalTimes;
String b = df.format(avg);
//构建mapper输出的key
CourseBean cb = new CourseBean(lines[0],lines[1],Float.parseFloat(b));

context.write(cb, NullWritable.get());
}
}

public static class MyReducer extends Reducer<CourseBean, NullWritable,CourseBean, NullWritable>{
@Override
protected void reduce(CourseBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
//因为自定义了分区组件，自定义类型有排序规则，所以这里直接输出就可以了
for (NullWritable nullWritable : values) {
context.write(key, nullWritable);
}
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(CourseTwo.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);

job.setOutputKeyClass(CourseBean.class);
job.setOutputValueClass(NullWritable.class);

//使用自定义的分区组件
job.setPartitionerClass(CoursePartitioner.class);

FileInputFormat.setInputPaths(job, new Path("G:/files/mr/day2/q3/input"));
FileOutputFormat.setOutputPath(job,new Path("G:/files/mr/day2/q3/output2") );

boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0:1);
}
}

• 评论

• 上一篇
• 下一篇