mapreduce 平均成绩

最新推荐文章于 2024-05-11 14:39:48 发布

ZL小屁孩

最新推荐文章于 2024-05-11 14:39:48 发布

阅读量880

点赞数

分类专栏： hadoop生态文章标签： hadoop mapreduce

本文链接：https://blog.csdn.net/ZH519080/article/details/51785757

版权

hadoop生态专栏收录该内容

13 篇文章 0 订阅

订阅专栏

package org.mapreduce;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/*
 * @time 2016-05-10 14:14:17
 * @author 朱海川
 */

//计算学生的平均成绩
public class AverageScore {
	
	public static class AverageScoreMap extends Mapper<LongWritable, Text, Text, IntWritable>{

		/*
		Mapper处理的数据是由InputFormat分解成InputSplit的数据集，每一个InputSplit将由一个Mapper处理。
		InputFormat的默认是针对文本文件的TextInputFormat，将按行将文本分割成InputSplit，并用LineRecrddReader
		将InputSplit解析成<key,value>对，key是所在行的文本中的位置，value是文本中的一行
		*/
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			//将输入的数据首先按行进行分割
			StringTokenizer stringTokenizer = new StringTokenizer(value.toString(), "\n");
			//分别对每一行数据进行处理
			while(stringTokenizer.hasMoreElements()){
				//每一行数据按空格进行划分
				//StringTokenizer类的默认分隔符是：空格，制表符(\t)、换行符(\n)、回车符(\r)
				StringTokenizer lineStringTokenizer = new StringTokenizer(stringTokenizer.nextToken());
				
				String studentName = lineStringTokenizer.nextToken();//学生的名字
				String studendScore = lineStringTokenizer.nextToken();//学生成绩
				
				Text studentNameText = new Text(studentName);
				int studentScoreInt = Integer.parseInt(studendScore);
				//输出姓名和成绩,mapper的输出是reducer的输入
				context.write(studentNameText, new IntWritable(studentScoreInt));
			}
		}
		
	}
	
	
	public static class AverageScoreReduce extends Reducer<Text, IntWritable, Text, IntWritable>{

		/*
		map的结果或通过partion分发到reducer，reducer完成reduce的操作后，将通过outputFormat输出。
		mapper最终处理结果是键值对<key,value>，会送到reducer中进行合并，合并的时候有相同key的<key,value>则发送到同一个reducer上。
		reducer的输入时key和这个key对应的所有相同value的一个iteratble迭代器，同时还有reducer的context
		*/
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
			int sum = 0,count = 0;
			Iterator<IntWritable> iterator = values.iterator();
			while(iterator.hasNext()){
				sum += iterator.next().get();
				count++; //统计总的科目数
			}
			int average = sum / count; //计算平均成绩
			context.write(key, new IntWritable(average));
		}
		
	}
	
	public static Job getJob(Path inPath,Path outPath) throws IOException{
		
		Configuration configuration = new Configuration();
		Job job = new Job(configuration);
		
		job.setJarByClass(AverageScore.class);
		job.setMapperClass(AverageScoreMap.class);
		job.setReducerClass(AverageScoreReduce.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		FileInputFormat.addInputPath(job, inPath);
		FileOutputFormat.setOutputPath(job, outPath);
		return job;
	}
	//原始数据：
	
	/*china:                   
	曹操 92
	孙权 88
	刘备 94
	诸葛亮 100
	郭嘉 57
	*/
	
	/* math
	曹操 100
	孙权 95
	刘备 90
	诸葛亮 66
	郭嘉 88
	*/
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Path inPath = new Path("hdfs://localhost:9000/averagescore/");
		
		Path outPath = new Path("hdfs://localhost:9000/result");
		Job job = getJob(inPath, outPath);
		long startTime = System.currentTimeMillis();
		if (job.waitForCompletion(true)) {
			System.out.println("job finished in:"+(System.currentTimeMillis() - startTime)/1000.0 +" seconds");
		}
	}

}