Student.java
package com.igeekhome.mapreduce.model;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Student implements Writable {
private Integer stuId;
private String stuName;
private String stuClass;
private Integer chineseGrade;
private Integer mathGrade;
private Integer englishGrade;
private Integer averageScore;
public Student() {
}
//重写序列化方法
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(stuId);
out.writeUTF(stuName);
out.writeUTF(stuClass);
out.writeInt(chineseGrade);
out.writeInt(mathGrade);
out.writeInt(englishGrade);
out.writeInt(averageScore);
}
//重写反序列化方法
@Override
public void readFields(DataInput in) throws IOException {
this.stuId = in.readInt();
this.stuName = in.readUTF();
this.stuClass = in.readUTF();
this.chineseGrade = in.readInt();
this.mathGrade = in.readInt();
this.englishGrade = in.readInt();
this.averageScore = in.readInt();
}
@Override
public String toString() {
return this.averageScore.toString();
}
public Integer getStuId() {
return stuId;
}
public void setStuId(Integer stuId) {
this.stuId = stuId;
}
public String getStuName() {
return stuName;
}
public void setStuName(String stuName) {
this.stuName = stuName;
}
public String getStuClass() {
return stuClass;
}
public void setStuClass(String stuClass) {
this.stuClass = stuClass;
}
public Integer getChineseGrade() {
return chineseGrade;
}
public void setChineseGrade(Integer chineseGrade) {
this.chineseGrade = chineseGrade;
}
public Integer getMathGrade() {
return mathGrade;
}
public void setMathGrade(Integer mathGrade) {
this.mathGrade = mathGrade;
}
public Integer getEnglishGrade() {
return englishGrade;
}
public void setEnglishGrade(Integer englishGrade) {
this.englishGrade = englishGrade;
}
public Integer getAverageScore() {
return averageScore;
}
public void setAverageScore(Integer averageScore) {
this.averageScore = averageScore;
}
public void setAverageScore() {
averageScore = (this.chineseGrade + this.mathGrade + this.englishGrade) / 3;
}
}
AverageScorePartitioner.java
package com.igeekhome.mapreduce.student;
import com.igeekhome.mapreduce.model.Student;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class AverageScorePartitioner extends Partitioner<LongWritable, Student> {
@Override
public int getPartition(LongWritable longWritable, Student student,int numberPartitioner){
//获取平均分
Integer averageScore=student.getAverageScore();
//定义分区号
int partitionNum=0;
if(averageScore<60){
partitionNum=0;
}else if(averageScore<=80){
partitionNum=1;
}else {
partitionNum=2;
}
return partitionNum;
}
}
StudentDriver.java
package com.igeekhome.mapreduce.student;
import com.igeekhome.mapreduce.model.Student;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class StudentDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//1.获取配置信息对象和job对象
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
//2.关联Driver类
job.setJarByClass(StudentDriver.class);
//3.设置mapper和reducer对象
job.setMapperClass(StudentMapper.class);
job.setReducerClass(StudentReducer.class);
//4.设置mapper输出的kv类型
job.setMapOutputValueClass(LongWritable.class);
job.setMapOutputValueClass(Student.class);
//5.设置最终输出的kv类型(reducer输出的kv类型)
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Student.class);
//6.设置文件的输入路径和计算结果的输出路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//7.设置自定义分区
job.setPartitionerClass(AverageScorePartitioner.class);
//设置Reduce Task的个数
job.setNumReduceTasks(3);
//8.提交任务 进行计算
boolean result=job.waitForCompletion(true);
System.out.println(result?"执行成功":"执行失败");
}
}
StudentMapper.java
package com.igeekhome.mapreduce.student;
import com.igeekhome.mapreduce.model.Student;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class StudentMapper extends Mapper<LongWritable, Text,LongWritable, Student> {
//新建FlowBean对象 作为输出的value
private Student valueOut=new Student();
//新建LongWritable作为输出的key
private LongWritable keyOut=new LongWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行数据
String line=value.toString();
//根据分隔符进行数据拆分
String[] studentData=line.split(",");
//根据下标提取数据
String stuId=studentData[0];
String stuName=studentData[1];
String stuClass=studentData[2];
String chineseGrade=studentData[3];
String mathGrade=studentData[4];
String englishGrade=studentData[5];
//为对象赋值
valueOut.setStuId(Integer.parseInt(stuId.trim()));
valueOut.setStuName(stuName.trim());
valueOut.setStuClass(stuClass.trim());
valueOut.setChineseGrade(Integer.parseInt(chineseGrade.trim()));
valueOut.setMathGrade(Integer.parseInt(mathGrade.trim()));
valueOut.setEnglishGrade(Integer.parseInt(englishGrade.trim()));
//计算平均值
valueOut.setAverageScore();
//为输出的key赋值
keyOut.set(Long.parseLong(stuId.trim()));
//map阶段输出
context.write(keyOut,valueOut);
}
}
StudentReducer.java
package com.igeekhome.mapreduce.student;
import com.igeekhome.mapreduce.model.Student;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class StudentReducer extends Reducer<LongWritable, Student,LongWritable, Student> {
//创建Student对象,作为输出的value
private Student valueOut=new Student();
@Override
protected void reduce(LongWritable key, Iterable<Student> values, Context context) throws IOException, InterruptedException {
for (Student student : values) {
Integer averageScore = student.getAverageScore();
//为输出的value赋值
valueOut.setAverageScore(averageScore);
}
//reduce阶段输出
context.write(key,valueOut);
}
}
students_message.txt (英文逗号)
1001,张三,一班,85,77,96
1002,李斯,一班,45,62,64
1003,夏天,二班,94,89,92
1004,子涵,三班,61,77,76
1005,思齐,二班,75,84,93
1006,莫言,三班,99,85,79
1007,乔和,一班,88,77,66
--输入路径
D:\bigdata\students_message.txt
--输出路径
D:\bigdata\students_message_output
打包mapreduce_demo.jar放到/opt/jar文件夹中 ,上传/opt/file/student_message.txt文件到/input文件夹中
在linux虚拟机上执行程序
hadoop jar /opt/jar/mapreduce_demo.jar com.igeekhome.mapreduce.student.StudentDriver /input /output