mapreduce 平均成绩

package org.mapreduce;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/*
 * @time 2016-05-10 14:14:17
 * @author 朱海川
 */

//计算学生的平均成绩
public class AverageScore {
	
	public static class AverageScoreMap extends Mapper<LongWritable, Text, Text, IntWritable>{

		/*
		Mapper处理的数据是由InputFormat分解成InputSplit的数据集,每一个InputSplit将由一个Mapper处理。
		InputFormat的默认是针对文本文件的TextInputFormat,将按行将文本分割成InputSplit,并用LineRecrddReader
		将InputSplit解析成<key,value>对,key是所在行的文本中的位置,value是文本中的一行
		*/
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			//将输入的数据首先按行进行分割
			StringTokenizer stringTokenizer = new StringTokenizer(value.toString(), "\n");
			//分别对每一行数据进行处理
			while(stringTokenizer.hasMoreElements()){
				//每一行数据按空格进行划分
				//StringTokenizer类的默认分隔符是:空格,制表符(\t)、换行符(\n)、回车符(\r)
				StringTokenizer lineStringTokenizer = new StringTokenizer(stringTokenizer.nextToken());
				
				String studentName = lineStringTokenizer.nextToken();//学生的名字
				String studendScore = lineStringTokenizer.nextToken();//学生成绩
				
				Text studentNameText = new Text(studentName);
				int studentScoreInt = Integer.parseInt(studendScore);
				//输出姓名和成绩,mapper的输出是reducer的输入
				context.write(studentNameText, new IntWritable(studentScoreInt));
			}
		}
		
	}
	
	
	public static class AverageScoreReduce extends Reducer<Text, IntWritable, Text, IntWritable>{

		/*
		map的结果或通过partion分发到reducer,reducer完成reduce的操作后,将通过outputFormat输出。
		mapper最终处理结果是键值对<key,value>,会送到reducer中进行合并,合并的时候有相同key的<key,value>则发送到同一个reducer上。
		reducer的输入时key和这个key对应的所有相同value的一个iteratble迭代器,同时还有reducer的context
		*/
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
			int sum = 0,count = 0;
			Iterator<IntWritable> iterator = values.iterator();
			while(iterator.hasNext()){
				sum += iterator.next().get();
				count++; //统计总的科目数
			}
			int average = sum / count; //计算平均成绩
			context.write(key, new IntWritable(average));
		}
		
	}
	
	public static Job getJob(Path inPath,Path outPath) throws IOException{
		
		Configuration configuration = new Configuration();
		Job job = new Job(configuration);
		
		job.setJarByClass(AverageScore.class);
		job.setMapperClass(AverageScoreMap.class);
		job.setReducerClass(AverageScoreReduce.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		FileInputFormat.addInputPath(job, inPath);
		FileOutputFormat.setOutputPath(job, outPath);
		return job;
	}
	//原始数据:
	
	/*china:                   
	曹操 92
	孙权 88
	刘备 94
	诸葛亮 100
	郭嘉 57
	*/
	
	/* math
	曹操 100
	孙权 95
	刘备 90
	诸葛亮 66
	郭嘉 88
	*/
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Path inPath = new Path("hdfs://localhost:9000/averagescore/");
		
		Path outPath = new Path("hdfs://localhost:9000/result");
		Job job = getJob(inPath, outPath);
		long startTime = System.currentTimeMillis();
		if (job.waitForCompletion(true)) {
			System.out.println("job finished in:"+(System.currentTimeMillis() - startTime)/1000.0 +" seconds");
		}
	}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值