MyEclipse+MapReduce小作业

最新推荐文章于 2020-03-05 17:44:37 发布

め追光者

最新推荐文章于 2020-03-05 17:44:37 发布

阅读量180

点赞数 1

分类专栏： Hadoop

本文链接：https://blog.csdn.net/weixin_42390253/article/details/103605816

版权

Hadoop 专栏收录该内容

10 篇文章 1 订阅

订阅专栏

数据：
computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75
题目：
统计每门课程参考学生的平均分，并且按课程存入不同的结果文件，要求一门课程一个结果文件，并且按平均分从高到低排序，分数保留一位小数。

package 竞赛.studentScore;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner; 
public class CoursePartitioner extends Partitioner<CourseBean, NullWritable>{ 	
	@Override
	public int getPartition(CourseBean key, NullWritable value, int numPartitions) {
		if("algorithm".equals(key.getCourse())){
			return 0;
		}else if("computer".equals(key.getCourse())){
			return 1;
		}else if("english".equals(key.getCourse())){
			return 2;
		}else{
			return 3;
		}
	} 
}

package 竞赛.studentScore;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
 public class CourseBean implements WritableComparable<CourseBean>{
	private String course; //课程名
	private String name; //学生姓名
	private float avg; //平均分	
	public String getCourse() {
		return course;//获得课程名
	}
	public void setCourse(String course) {
		this.course = course;//课程名赋值
	}
	public String getName() {//获得姓名
		return name;
	}
	public void setName(String name) {
		this.name = name;//姓名赋值
	}
	public float getAvg() {
		return avg;//获得平均分
		}
	public void setAvg(float avg) {
		this.avg = avg;//平均分赋值
	}	
	public CourseBean(String course, String name, float avg) {
		super();
		this.course = course;
		this.name = name;
		this.avg = avg;
	}	
	public CourseBean() {}		
	@Override//通过toString方法自定义输出类型
	public String toString() {
		return course + "\t" + name + "\t" + avg;}	
	@Override//序列号
	public void write(DataOutput out) throws IOException {
		out.writeUTF(course);
		out.writeUTF(name);
		out.writeFloat(avg);
	}	
	@Override//反序列化
	public void readFields(DataInput in) throws IOException {
		course = in.readUTF();
		name = in.readUTF();
		avg = in.readFloat();
	}	
	@Override//比较规则
	public int compareTo(CourseBean o) {
		float flag = o.avg - this.avg;
		return flag > 0.0f ? 1:-1;
	}
 }

package 竞赛.studentScore;

import java.io.IOException;
import java.text.DecimalFormat; 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
/**
 * 统计每门课程的参考人数和课程平均分
 * 考虑到要需求要根据课程进行分组并对平均值进行排序，这里使用自定义bean的形式来进行处理
 * 因为要将数据根据课程进行分区并写入到不容的文件中，所以这里使用自定partitioner组件进行分区
 * 要注意的是这时候就要设置reduceTask的个数
 */
public class CourseTwo {
	static Text text = new Text(); 
	public static class MyMapper extends Mapper<LongWritable, Text, CourseBean, NullWritable>{
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String[] lines = value.toString().split(",");//computer,huangxiaoming,85,86,41,75,93,42,85
			long sum = 0L;
			long totalTimes = lines.length-2;//totalTimes=7
			for (int i = 2; i < lines.length; i++) {
				sum += Long.parseLong(lines[i]);//85+86+41+75+93+42+85
			}
			//格式化平均分使用，保留一位有效小数
			DecimalFormat df=new DecimalFormat(".0");
			//计算某个学生在某门课程中的平均分
			float avg = sum*1.0f/totalTimes;
			String b = df.format(avg);
			//构建mapper输出的key          课程名       姓名           平均分
			CourseBean cb = new CourseBean(lines[0],lines[1],Float.parseFloat(b));	
			context.write(cb, NullWritable.get());
		}
	}	
	public static class MyReducer extends Reducer<CourseBean, NullWritable,CourseBean, NullWritable>{
		@Override
		protected void reduce(CourseBean key, Iterable<NullWritable> values,Context context)
				throws IOException, InterruptedException {
			//因为自定义了分区组件，自定义类型有排序规则，所以这里直接输出就可以了
			for (NullWritable nullWritable : values) {
				context.write(key, nullWritable);
			}
		}
	}	
	public static void main(String[] args) throws Exception {
		String input = "hdfs://192.168.216.128:9000/home/master/hadoop/input/studentsScore.txt";
	    String output = "hdfs://192.168.216.128:9000/home/master/hadoop/output2/"; 
		Configuration conf = new Configuration();		
		Job job = Job.getInstance(conf);		
		job.setJarByClass(CourseTwo.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);	
		job.setMapOutputKeyClass(CourseBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(CourseBean.class);
		job.setOutputValueClass(NullWritable.class);		
		//使用自定义的分区组件
		job.setPartitionerClass(CoursePartitioner.class);
		//和自定义分区组件同时使用，根据分区的个数设置reduceTask的个数
		job.setNumReduceTasks(4);		
		FileInputFormat.setInputPaths(job, new Path(input));
		FileOutputFormat.setOutputPath(job, new Path(output));
		boolean isDone = job.waitForCompletion(true);
		System.exit(isDone ? 0:1);
	}
}

结果：
在这里插入图片描述

め追光者

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MyEclipse+MapReduce小作业

数据：computer,huangxiaoming,85,86,41,75,93,42,85computer,xuzheng,54,52,86,91,42computer,huangbo,85,42,96,38english,zhaobenshan,54,52,86,91,42,85,75english,liuyifei,85,41,75,21,85,96,14algorithm,li...
复制链接

扫一扫

专栏目录