hadoop学习之MapReduce案例:输出每个班级中的成绩前三名的学生
所要处理的数据案例:
1500100001 施笑槐,22,女,文科六班,406
1500100002 吕金鹏,24,男,文科六班,440
1500100003 单乐蕊,22,女,理科六班,359
1500100004 葛德曜,24,男,理科三班,421
1500100005 宣谷芹,22,女,理科五班,395
1500100006 边昂雄,21,男,理科二班,314
...
1.Map端
package com.shujia.mr.top3;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class Top3Mapper extends Mapper<LongWritable, Text, Text, Stu> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Stu>.Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
if (split.length == 2) {
String otherInfo = split[1];
String[] columns = otherInfo.split(",");
if (columns.length == 5) {
String clazz = columns[3];
Stu stu = new Stu(split[0], columns[0], Integer.valueOf(columns[1]), columns[2], columns[3], Integer.valueOf(columns[4]));
context.write(new Text(clazz), stu);
}
}
}
}
2.Reduce端
package com.shujia.mr.top3;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
public class Top3Reducer extends Reducer<Text, Stu, Text, NullWritable> {
@Override
protected void reduce(Text key, Iterable<Stu> values, Reducer<Text, Stu, Text, NullWritable>.Context context) throws IOException, InterruptedException {
ArrayList<Stu> stus = new ArrayList<>();
for (Stu stu : values) {
Stu stu1 = new Stu(stu.id, stu.name, stu.age, stu.gender, stu.clazz, stu.score);
stus.add(stu1);
}
Collections.sort(stus,
new Comparator<Stu>() {
@Override
public int compare(Stu o1, Stu o2) {
int compareScore = o1.score - o2.score;
return -compareScore > 0 ? 1 : (compareScore == 0 ? o1.id.compareTo(o2.id) : -1);
}
}
);
for (int i = 0; i < 3; i++) {
context.write(new Text(stus.get(i).toString()+","+(i+1)),NullWritable.get());
}
}
}
3.main方法
package com.shujia.mr.top3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.FileNotFoundException;
import java.io.IOException;
public class Top3 {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Top3");
job.setJarByClass(Top3.class);
job.setMapperClass(Top3Mapper.class);
job.setReducerClass(Top3Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Stu.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileSystem fileSystem = FileSystem.get(job.getConfiguration());
Path outPath = new Path("hadoop/out/new_top3");
Path inpath = new Path("hadoop/out/reducejoin");
if (!fileSystem.exists(inpath)) {
throw new FileNotFoundException(inpath+"不存在");
}
TextInputFormat.addInputPath(job,inpath);
if (fileSystem.exists(outPath)) {
System.out.println("路径存在,开始删除");
fileSystem.delete(outPath,true);
}
TextOutputFormat.setOutputPath(job,outPath);
job.waitForCompletion(true);
}
}
4.创建的学生类
package com.shujia.mr.top3;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
public class Stu implements Writable {
String id;
String name;
int age;
String gender;
String clazz;
int score;
public Stu() {
}
public Stu(String id, String name, int age, String gender, String clazz, int score) {
this.id = id;
this.name = name;
this.age = age;
this.gender = gender;
this.clazz = clazz;
this.score = score;
}
@Override
public String toString() {
return id +
", " + name +
", " + age +
", " + gender +
", " + clazz +
", " + score;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(id);
out.writeUTF(name);
out.writeInt(age);
out.writeUTF(gender);
out.writeUTF(clazz);
out.writeInt(score);
}
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readUTF();
this.name = in.readUTF();
this.age = in.readInt();
this.gender = in.readUTF();
this.clazz = in.readUTF();
this.score = in.readInt();
}
}