准备数据
Bob 90 64 92
Alex 64 63 68
Grace 57 86 24
Henry 39 79 78
Adair 88 82 64
Chad 66 74 37
Colin 64 86 74
Eden 71 85 43
Grover 99 86 43
序列化类
写出啥就读到啥
package serialscore;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Score implements Writable {
//用数组来存储考试分数
private String name = "";
private int[] scores = new int[0];
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int[] getScores() {
return scores;
}
public void setScores(int[] scores) {
this.scores = scores;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
//写出姓名
dataOutput.writeUTF(this.name);
//没有直接写出数组这个方法,所以将数组拆成长度和元素分别传输
//先写长度
dataOutput.writeInt(this.scores.length);
//在写分数
for(int i:scores){
dataOutput.writeInt(i);
}
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.name = dataInput.readUTF();
//第二个读到的应该是一个整数
int length = dataInput.readInt();
this.scores = new int[length];
for(int i=0;i<length;i++){
scores[i] = dataInput.readInt();
}
}
}
mapper类
package serialscore;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class ScoreMapper extends Mapper<LongWritable, Text,Text,Score> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] s = value.toString().split(" ");
Score score = new Score();
score.setName(s[0]);
int[] scores = new int[s.length-1];
for(int i=1;i<s.length;i++){
scores[i-1] = Integer.parseInt(s[i]);
}
score.setScores(scores);
context.write(new Text(score.getName()),score);
}
}
Reducer类
package serialscore;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.junit.Test;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.Arrays;
public class ScoreReducer extends Reducer<Text,Score,Text,Text> {
@Override
protected void reduce(Text key, Iterable<Score> values, Context context) throws IOException, InterruptedException {
int sum = 0;
int count = 0;//统计有多少科目
for(Score s : values){
int[] scores = s.getScores();
sum+=Arrays.stream(scores).sum();
count+=scores.length;
}
//求平均分
double average = sum/count;
//保留两位小数
DecimalFormat df = new DecimalFormat("0.00");
String format = df.format(average);
context.write(key,new Text(format));
}
}
Driver
package serialscore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ScoreDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(ScoreDriver.class);
job.setMapperClass(ScoreMapper.class);
job.setReducerClass(ScoreReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Score.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job,new Path("hdfs://hadoop01:9000/txt/score.txt"));
FileOutputFormat.setOutputPath(job,new Path("hdfs://hadoop01:9000/result/ruanwenfu"));
job.waitForCompletion(true);
}
}