无combine
0. 项目结构
目标:求出每个同学成绩的平均分
1. AvgDriver.java
package hadoop_test.avg_demo_03;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AvgDriver {
/* 数据内容
tom 69
tom 88
tom 78
jary 109
jary 90
jary 81
jary 35
rose 23
rose 100
rose 230
*/
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(AvgDriver.class);
job.setMapperClass(AvgMapper.class);
job.setReducerClass(AvgReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.setInputPaths(job, new Path("/hadoop_test/avg/avg.txt"));
FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/result"));
job.waitForCompletion(true);
}
}
2. AvgMapper
package hadoop_test.avg_demo_03;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AvgMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// class01 69
String line = value.toString();
// 按空格进行分割,读取第一个数据,将其作为Key,例:class01
String outkeys=line.split(" ")[0];
// 按空格进行分割,读取第二个数据,将其作为Value,例:69
// 将Text转化为十进制整数
int outvalues=Integer.parseInt(line.split(" ")[1]);
context.write(new Text(outkeys),new IntWritable(outvalues));
}
}
3. AvgReducer
package hadoop_test.avg_demo_03;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class AvgReducer extends Reducer<Text,IntWritable,Text,DoubleWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int flag=0; // 计算考试次数
int count=0; // 计算考试成绩总和
for (IntWritable value:
values) {
count+=value.get();
flag+=1;
}
float re=count/flag; // 求平均分
context.write(new Text(key),new DoubleWritable(re));
}
}
有combine
0. 项目结构
1. AvgDriver
package hadoop_test.avg_hmk_03;
import hadoop_test.Utils_hadoop;
import hadoop_test.word_count_demo_01.WordCountCombine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AvgDriver {
/* 数据内容
tom 69
tom 88
tom 78
jary 109
jary 90
jary 81
jary 35
rose 23
rose 100
rose 230
*/
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(AvgDriver.class);
job.setMapperClass(AvgMapper.class);
job.setCombinerClass(AvgCombine.class);
job.setReducerClass(AvgReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class); // 注意Map输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.setInputPaths(job, new Path("/hadoop_test/avg/avg.txt"));
//FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/homework_result"));
if( Utils_hadoop.testExist(conf,"/hadoop_test/avg/homework_result")){
Utils_hadoop.rmDir(conf,"/hadoop_test/avg/homework_result");
}
FileOutputFormat.setOutputPath(job, new Path("/hadoop_test/avg/homework_result"));
job.waitForCompletion(true);
}
}
2. AvgMapper
package hadoop_test.avg_hmk_03;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AvgMapper extends Mapper<LongWritable,Text,Text,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// class01 69
String line = value.toString();
// 按空格进行分割,读取第一个数据,将其作为Key,例:class01
String outkeys=line.split(" ")[0];
// 按空格进行分割,读取第二个数据,将其作为Value,例:69
// 将Text转化为十进制整数
int outvalues=Integer.parseInt(line.split(" ")[1]);
System.out.println(outkeys + ":" + outvalues);
context.write(new Text(outkeys),new Text(String.valueOf(outvalues)));
}
}
注意:将Map输出的Value变为Text。因为目标Combine格式为:人名 : 总成绩_考试次数
, 因为Combine使用的Context与Mapper保持一致,因此Map输出的value也需设为Text。
3. AvgCombine
package hadoop_test.avg_hmk_03;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class AvgCombine extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int num = 0; // 计算考试次数
int count = 0; // 计算考试成绩总和
for(Text value: values){
count += Integer.parseInt(value.toString());
num += 1;
}
System.out.println(key + ":" + count + "_" + num);
// 因各个平均值累加起来再平均后与总平均值不一定相等,故以 Tom 68_4 的格式输出给Reduce
context.write(new Text(key), new Text(String.valueOf(count) + "_" + String.valueOf(num)));
}
}
combiner继承于Reducer,相当于是在shuffle阶段经过sort后对其进行的局部reduce,代码逻辑与上述Reducer相同。主要区别在于输入数据的来源和输出数据的目标去向。
通过Iterable<>迭代器,将相同的key所拥有的值放到迭代器里。然后,使用for循环将值和累加进行数据合并。
注意:平均值必须整体求和后再除。若先求各部分平均值后,再将其累加求和再求平均值,则不一定与原值相等。
4. AvgReducer
package hadoop_test.avg_hmk_03;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class AvgReducer extends Reducer<Text,Text,Text,DoubleWritable> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int num = 0; // 计算考试次数
int count = 0; // 计算考试成绩总和
for (Text value:
values) {
String subCou = value.toString().split("_")[0];
String subNum = value.toString().split("_")[1];
count += Integer.parseInt(subCou);
num += Integer.parseInt(subNum);
}
double re = count / num; // 求平均分
context.write(new Text(key),new DoubleWritable(re));
}
}