MapReduce经典案例 ------- 学生成绩处理

Z_Data

于 2018-11-09 08:52:23 发布

阅读量3.7k

点赞数 3

分类专栏： Java MapReduce HDFS

本文链接：https://blog.csdn.net/Z_Date/article/details/83860743

版权

MapReduce 同时被 3 个专栏收录

32 篇文章 0 订阅

订阅专栏

Java

30 篇文章 0 订阅

订阅专栏

HDFS

16 篇文章 1 订阅

订阅专栏

待处理数据内容：

学生成绩表
名字	语文	数学	英语
lh	92	68	70
zyt	94	88	75
ls	96	78	78
hgw	90	70	56
yxx	80	88	73
hz	90	98	70
xyd	60	88	73
hj	90	58	70
cs	50	58	11

算每个人的平均成绩

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AvgDemo {
	//自定义myMapper
		public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
			//只在map方法运行之前执行一次。(仅执行一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}
			
			Text k = new Text();
			Text v = new Text();
			
			@Override
			protected void map(LongWritable key, Text value,Context context)
					throws IOException, InterruptedException {
				String line = value.toString();
				String [] scores = line.split("\t");
				String name = scores[0];
				String chinese = scores[1];
				String math = scores[2];
				String english = scores[3];
				double avg = (Integer.parseInt(chinese) + Integer.parseInt(math) + 
						Integer.parseInt(english)) / ((scores.length-1)*1.0);
				k.set(name);
				v.set(avg+"");
				context.write(k,v);
				
			}
			
			//map方法运行完后执行一次(仅执行一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			}
		}
		/*
		//自定义myReducer
		public static class MyReducer extends Reducer<Text, Text, Text, Text>{
			//在reduce方法执行之前执行一次。(仅一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}

			@Override
			protected void reduce(Text key, Iterable<Text> value,Context context)
					throws IOException, InterruptedException {
			}
			//在reduce方法执行之后执行一次。(仅一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			}
		}
		*/
		/**
		 * job的驱动方法
		 * @param args
		 */
		public static void main(String[] args) {
			try {
				//1、获取Conf
				Configuration conf = new Configuration();
				//2、创建job
				Job job = Job.getInstance(conf, "model01");
				//3、设置运行job的class
				job.setJarByClass(AvgDemo.class);
				//4、设置map相关属性
				job.setMapperClass(MyMapper.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				FileInputFormat.addInputPath(job, new Path(args[0]));
				
				//5、设置reduce相关属性
				/*job.setReducerClass(MyReducer.class);
				job.setOutputKeyClass(Text.class);
				job.setOutputValueClass(Text.class);*/
				FileOutputFormat.setOutputPath(job, new Path(args[1]));
				
				//6、提交运行job
				int isok = job.waitForCompletion(true) ? 0 : 1;
				
				//退出
				System.exit(isok);
				
			} catch (IOException | ClassNotFoundException | InterruptedException e) {
				e.printStackTrace();
			}
		}
}

求每个学科的平均成绩

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
每个学科的平均成绩？
语文	数学	英语
76	89	90
 * @author lyd
 *
 */
public class AvgDemo02 {
	//自定义myMapper
		public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
			//只在map方法运行之前执行一次。(仅执行一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}
			
			Text k = new Text();
			Text v = new Text();
			@Override
			protected void map(LongWritable key, Text value,Context context)
					throws IOException, InterruptedException {
				String line = value.toString();
				String scores [] = line.split("\t");
				String chinese = scores[1];
				String math = scores[2];
				String english = scores[3];
				k.set("_");
				v.set(chinese+"_"+math+"_"+english);
				context.write(k, v);
			}
			
			//map方法运行完后执行一次(仅执行一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			
			}
		}
		
		//自定义myReducer
		public static class MyReducer extends Reducer<Text, Text, Text, Text>{
			//在reduce方法执行之前执行一次。(仅一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
				context.write(new Text("语文"+"\t"+"数学"+"\t"+"英语"), new Text(""));
			}

			@Override
			protected void reduce(Text key, Iterable<Text> value,Context context)
					throws IOException, InterruptedException {
				int counter = 0;
				double c = 0;
				double m = 0;
				double e = 0;
				for (Text t : value) {
					String scores [] =  t.toString().split("_");
					c += Double.parseDouble(scores[0]);
					m += Double.parseDouble(scores[1]);
					e += Double.parseDouble(scores[2]);
					counter ++;
				}
				context.write(new Text(c/counter+"\t"+m/counter+"\t"+e/counter), new Text(""));
			
			}
			//在reduce方法执行之后执行一次。(仅一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			}
		}
		
		/**
		 * job的驱动方法
		 * @param args
		 */
		public static void main(String[] args) {
			try {
				//1、获取Conf
				Configuration conf = new Configuration();
				//2、创建job
				Job job = Job.getInstance(conf, "model01");
				//3、设置运行job的class
				job.setJarByClass(AvgDemo02.class);
				//4、设置map相关属性
				job.setMapperClass(MyMapper.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				FileInputFormat.addInputPath(job, new Path(args[0]));
				
				//5、设置reduce相关属性
				job.setReducerClass(MyReducer.class);
				job.setOutputKeyClass(Text.class);
				job.setOutputValueClass(Text.class);
				FileOutputFormat.setOutputPath(job, new Path(args[1]));
				
				//6、提交运行job
				int isok = job.waitForCompletion(true) ? 0 : 1;
				
				//退出
				System.exit(isok);
				
			} catch (IOException | ClassNotFoundException | InterruptedException e) {
				e.printStackTrace();
			}
		}
}

总平均分每个分数段的人数以及百分比

各阶段成绩所占百分比
分数段	人数	占总数的百分比
<60	1	8%
60-70	2	16%
70-80	5	33%
80-90	2	16%
90-100	3	28%

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
<60	1 8%
60-70 2 %16
70-80 5 33%
80-90 2 16%
90-100 3 28%
 * @author lyd
 *
 */
public class AvgDemo03 {
	//static int counter = 0;
		//自定义myMapper
		public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
			//只在map方法运行之前执行一次。(仅执行一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}
			
			Text k = new Text();
			Text v = new Text();
			@Override
			protected void map(LongWritable key, Text value,Context context)
					throws IOException, InterruptedException {
				String line = value.toString();
				String scores [] = line.split("\t");
				String chinese = scores[1];
				String math = scores[2];
				String english = scores[3];
				double avg = (Double.parseDouble(chinese) + Double.parseDouble(math) 
						+ Double.parseDouble(english))/(scores.length-1);
				//判断
				if(avg < 60){
					k.set("<60");
					v.set("1");
				} else if(avg >= 60 && avg < 70){
					k.set("60-70");
					v.set("1");
				} else if(avg >= 70 && avg < 80){
					k.set("70-80");
					v.set("1");
				} else if(avg >= 80 && avg < 90){
					k.set("80-90");
					v.set("1");
				} else if(avg >= 90 && avg <= 100){
					k.set("90-100");
					v.set("1");
				} 
				//context.getConfiguration().setInt("counter", counter);
				context.write(k, v);
			}
			
			//map方法运行完后执行一次(仅执行一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			
			}
		}
		
		//自定义myReducer
		public static class MyReducer extends Reducer<Text, Text, Text, Text>{
			//在reduce方法执行之前执行一次。(仅一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
				context.write(new Text("分数段"), new Text("人数"+"\t"+"百分比"));
			}

			int totalPerson = 0;
			/*int l6 = 0;
			int g6l7 = 0;
			int g7l8 = 0;
			int g8l9 = 0;
			int g9l10 = 0;*/
			
			List<String> li = new ArrayList<String>();
			@Override
			protected void reduce(Text key, Iterable<Text> value,Context context)
					throws IOException, InterruptedException {
				/**
				 * <60 list(1,1)
				 */
				int i = 0;
				for (Text t : value) {
					if(key.toString().equals("<60")){
						//l6 ++;
						i ++ ;
					} else if (key.toString().equals("60-70")){
						//g6l7 ++;
						i ++ ;
					}  else if (key.toString().equals("70-80")){
						//g7l8 ++ ;
						i ++ ;
					}  else if (key.toString().equals("80-90")){
						//g8l9 ++;
						i ++ ;
					}  else if (key.toString().equals("90-100")){
						//g9l10 ++;
						i ++ ;
					} 
					totalPerson ++ ;
				}
				li.add(key.toString()+"_"+i);
				//context.getConfiguration().get("counter");
			}
			//在reduce方法执行之后执行一次。(仅一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
				for (String s : li) {
					String l [] = s.split("_");
					context.write(new Text(l[0]), new Text(l[1]+"\t"+Double.parseDouble(l[1])/totalPerson*100+"%"));
				}
			}
		}
		
		/**
		 * job的驱动方法
		 * @param args
		 */
		public static void main(String[] args) {
			try {
				//1、获取Conf
				Configuration conf = new Configuration();
				//2、创建job
				Job job = Job.getInstance(conf, "model01");
				//3、设置运行job的class
				job.setJarByClass(AvgDemo03.class);
				//4、设置map相关属性
				job.setMapperClass(MyMapper.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				FileInputFormat.addInputPath(job, new Path(args[0]));
				
				//5、设置reduce相关属性
				job.setReducerClass(MyReducer.class);
				job.setOutputKeyClass(Text.class);
				job.setOutputValueClass(Text.class);
				FileOutputFormat.setOutputPath(job, new Path(args[1]));
				
				//6、提交运行job
				int isok = job.waitForCompletion(true) ? 0 : 1;
				
				//退出
				System.exit(isok);
				
			} catch (IOException | ClassNotFoundException | InterruptedException e) {
				e.printStackTrace();
			}
		}
}

将三门课程中任意一门不及格的学生过滤出来


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
将三门课程中任意一门不及格的学生过滤出来？
 * @author lyd
 *
 */
public class GrepDemo {
	//static int counter = 0;
		//自定义myMapper
		public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
			//只在map方法运行之前执行一次。(仅执行一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}
			
			Text k = new Text();
			Text v = new Text();
			@Override
			protected void map(LongWritable key, Text value,Context context)
					throws IOException, InterruptedException {
				String line = value.toString();
				String scores [] = line.split("\t");
				String chinese = scores[1];
				String math = scores[2];
				String english = scores[3];
				if(Double.parseDouble(chinese) < 60 || Double.parseDouble(math) < 60 || Double.parseDouble(english) < 60){
					context.write(value, new Text(""));
				}
			}
			
			//map方法运行完后执行一次(仅执行一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			
			}
		}
		
		/**
		 * job的驱动方法
		 * @param args
		 */
		public static void main(String[] args) {
			try {
				//1、获取Conf
				Configuration conf = new Configuration();
				//2、创建job
				Job job = Job.getInstance(conf, "model01");
				//3、设置运行job的class
				job.setJarByClass(GrepDemo.class);
				//4、设置map相关属性
				job.setMapperClass(MyMapper.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				FileInputFormat.addInputPath(job, new Path(args[0]));
				
				//5、设置reduce相关属性
				FileOutputFormat.setOutputPath(job, new Path(args[1]));
				
				//6、提交运行job
				int isok = job.waitForCompletion(true) ? 0 : 1;
				
				//退出
				System.exit(isok);
				
			} catch (IOException | ClassNotFoundException | InterruptedException e) {
				e.printStackTrace();
			}
		}
}

统计成材率

每一门成绩都大于60分的人数/总人数

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
统计成材率？每一门成绩都大于60分的人数/总人数
成材率 88%
留级率 12%
 * @author lyd
 *
 */
public class SuccessDemo {
	//static int counter = 0;
		//自定义myMapper
		public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
			//只在map方法运行之前执行一次。(仅执行一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
			}
			
			@Override
			protected void map(LongWritable key, Text value,Context context)
					throws IOException, InterruptedException {
				String line = value.toString();
				String scores [] = line.split("\t");
				String chinese = scores[1];
				String math = scores[2];
				String english = scores[3];
				if(Double.parseDouble(chinese) >= 60 && Double.parseDouble(math) >= 60 && Double.parseDouble(english) >= 60){
					context.write(new Text("up"), new Text("1"));
				} else {
					context.write(new Text("down"), new Text("1"));
				}
				
				
			}
			
			//map方法运行完后执行一次(仅执行一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
			
			}
		}
		
		//自定义myReducer
		public static class MyReducer extends Reducer<Text, Text, Text, Text>{
			//在reduce方法执行之前执行一次。(仅一次)
			@Override
			protected void setup(Context context)
					throws IOException, InterruptedException {
				context.write(new Text("分数段"), new Text("人数"+"\t"+"百分比"));
			}

			int totalPerson = 0;
			int u = 0;
			@Override
			protected void reduce(Text key, Iterable<Text> value,Context context)
					throws IOException, InterruptedException {
				for (Text t : value) {
					if(key.toString().equals("up")){
						u ++;
					}
					totalPerson ++;
				}
			}
			//在reduce方法执行之后执行一次。(仅一次)
			@Override
			protected void cleanup(Context context)
					throws IOException, InterruptedException {
				context.write(new Text("成才率"), new Text(u*100.0/totalPerson+"%"));
				context.write(new Text("留级率"), new Text((totalPerson-u)*100.0/totalPerson+"%"));
			}
		}
		
		/**
		 * job的驱动方法
		 * @param args
		 */
		public static void main(String[] args) {
			try {
				//1、获取Conf
				Configuration conf = new Configuration();
				conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
				//2、创建job
				Job job = Job.getInstance(conf, "model01");
				//3、设置运行job的class
				job.setJarByClass(SuccessDemo.class);
				//4、设置map相关属性
				job.setMapperClass(MyMapper.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				FileInputFormat.addInputPath(job, new Path(args[0]));
				
				//5、设置reduce相关属性
				job.setReducerClass(MyReducer.class);
				job.setOutputKeyClass(Text.class);
				job.setOutputValueClass(Text.class);
				//判断输出目录是否存在，若存在则删除
				FileSystem fs = FileSystem.get(conf);
				if(fs.exists(new Path(args[1]))){
					fs.delete(new Path(args[2]), true);
				}
				FileOutputFormat.setOutputPath(job, new Path(args[1]));
				
				//6、提交运行job
				int isok = job.waitForCompletion(true) ? 0 : 1;
				
				//退出
				System.exit(isok);
				
			} catch (IOException | ClassNotFoundException | InterruptedException e) {
				e.printStackTrace();
			}
		}
}