数据
computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75
题目需求
统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数
数据解释
数据字段个数不固定:
第一个是课程名称,总共有四个课程。
第二个是学生姓名,后面是每次考试的分数
代码
CourseScore类:
package com.mr3;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class CourseScore implements WritableComparable<CourseScore>
{
private String course;
private String name;
private double score;
public String getCourse() {
return course;
}
public void setCourse(String course) {
this.course = course;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public double getScore() {
return score;
}
public void setScore(double score) {
this.score = score;
}
public CourseScore(String course, String name, double score) {
super();
this.course = course;
this.name = name;
this.score = score;
}
public CourseScore()
{
}
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(course);
out.writeUTF(name);
out.writeDouble(score);
}
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.course=in.readUTF();
this.name=in.readUTF();
this.score=in.readDouble();
}
public int compareTo(CourseScore cs) {
// TODO Auto-generated method stub
return cs.getScore()-this.getScore() > 0 ? 1 : -1;
}
public String toString() {
return course + "\t" + name + "\t"+ score;
}
}
CourseFour类:
package com.mr3;
import java.io.IOException;
import java.text.DecimalFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CourseFour {
public static class MyMapper extends Mapper<LongWritable,Text,CourseScore,NullWritable>
{
protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
{
String s = value.toString();
String[] split = s.split(",");
double sumscore=0;
int count=0;
String course = split[0];
String name = split[1];
for(int i=2;i<split.length;i++)
{
sumscore = sumscore+Double.parseDouble(split[i]);
count++;
}
DecimalFormat df = new DecimalFormat("#.#");
String s1 = df.format(sumscore/count);
double avgscore = Double.parseDouble(s1);
CourseScore cs = new CourseScore(course,name,avgscore);
context.write(cs, NullWritable.get());
}
}
public static class MyPartition extends Partitioner<CourseScore,NullWritable>
{
/* 如果ReduceTask的数量比partitioner中的分组数多,就会多产生几个空文件,
如果少,就会发生异常,因为有一些key没有对应的ReduceTask接收*/
@Override
public int getPartition(CourseScore key, NullWritable value,int numPartitions)
{
// TODO Auto-generated method stub
String course = key.getCourse();
if(course.equalsIgnoreCase("algorithm"))
{
return 0;
}
if(course.equalsIgnoreCase("computer"))
{
return 1;
}
if(course.equalsIgnoreCase("english"))
{
return 2;
}
//必须用else不能用if(course.equalsIgnoreCase),否则会报错
else
{
return 3;
}
}
}
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException
{
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,CourseFour.class.getSimpleName());
job.setJarByClass(CourseFour.class);
job.setMapperClass(MyMapper.class);
job.setPartitionerClass(MyPartition.class);
job.setNumReduceTasks(4);
job.setMapOutputKeyClass(CourseScore.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(CourseScore.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);
}
}
结果
根据分区,一共形成了4个文件
part-r-00000:
part-r-00001:
part-r-00002:
part-r-00003:
说明:为了练习采用了博主“中琦2513“的数据