package org.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
* @time 2016-05-10 14:14:17
* @author 朱海川
*/
//计算学生的平均成绩
public class AverageScore {
public static class AverageScoreMap extends Mapper<LongWritable, Text, Text, IntWritable>{
/*
Mapper处理的数据是由InputFormat分解成InputSplit的数据集,每一个InputSplit将由一个Mapper处理。
InputFormat的默认是针对文本文件的TextInputFormat,将按行将文本分割成InputSplit,并用LineRecrddReader
将InputSplit解析成<key,value>对,key是所在行的文本中的位置,value是文本中的一行
*/
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
//将输入的数据首先按行进行分割
StringTokenizer stringTokenizer = new StringTokenizer(value.toString(), "\n");
//分别对每一行数据进行处理
while(stringTokenizer.hasMoreElements()){
//每一行数据按空格进行划分
//StringTokenizer类的默认分隔符是:空格,制表符(\t)、换行符(\n)、回车符(\r)
StringTokenizer lineStringTokenizer = new StringTokenizer(stringTokenizer.nextToken());
String studentName = lineStringTokenizer.nextToken();//学生的名字
String studendScore = lineStringTokenizer.nextToken();//学生成绩
Text studentNameText = new Text(studentName);
int studentScoreInt = Integer.parseInt(studendScore);
//输出姓名和成绩,mapper的输出是reducer的输入
context.write(studentNameText, new IntWritable(studentScoreInt));
}
}
}
public static class AverageScoreReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
/*
map的结果或通过partion分发到reducer,reducer完成reduce的操作后,将通过outputFormat输出。
mapper最终处理结果是键值对<key,value>,会送到reducer中进行合并,合并的时候有相同key的<key,value>则发送到同一个reducer上。
reducer的输入时key和这个key对应的所有相同value的一个iteratble迭代器,同时还有reducer的context
*/
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int sum = 0,count = 0;
Iterator<IntWritable> iterator = values.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
count++; //统计总的科目数
}
int average = sum / count; //计算平均成绩
context.write(key, new IntWritable(average));
}
}
public static Job getJob(Path inPath,Path outPath) throws IOException{
Configuration configuration = new Configuration();
Job job = new Job(configuration);
job.setJarByClass(AverageScore.class);
job.setMapperClass(AverageScoreMap.class);
job.setReducerClass(AverageScoreReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);
return job;
}
//原始数据:
/*china:
曹操 92
孙权 88
刘备 94
诸葛亮 100
郭嘉 57
*/
/* math
曹操 100
孙权 95
刘备 90
诸葛亮 66
郭嘉 88
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Path inPath = new Path("hdfs://localhost:9000/averagescore/");
Path outPath = new Path("hdfs://localhost:9000/result");
Job job = getJob(inPath, outPath);
long startTime = System.currentTimeMillis();
if (job.waitForCompletion(true)) {
System.out.println("job finished in:"+(System.currentTimeMillis() - startTime)/1000.0 +" seconds");
}
}
}
mapreduce 平均成绩
最新推荐文章于 2024-05-11 14:39:48 发布