求平均数和使用MapReduce的自带的计数方法
求平均数主要是是用了Combiner,然后在mapper端统计了匹配和不匹配的数目,最后在控制台输出
Combiner主要是在map阶段结束以后,对每个分区来说,可以分为不同的组,某个组有几条数据走combiner方法,却不影响最终的统计结果,这叫做满足结合律和交换律。即对一个分区来说,可以进行随意分组,组之间可以进行结合。
1、自定义Writable
public class AverageTuple implements Writable {
private long count;
private double averageTemp;
public long getCount() {
return count;
}
public double getAverageTemp() {
return averageTemp;
}
public void setCount(long count) {
this.count = count;
}
public void setAverageTemp(double averageTemp) {
this.averageTemp = averageTemp;
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(count);
dataOutput.writeDouble(averageTemp);
}
public void readFields(DataInput dataInput) throws IOException {
count=dataInput.readLong();
averageTemp=dataInput.readDouble();
}
@Override
public String toString() {
return "AverageCompute.AverageTuple{" +
"count=" + count +
", averageTemp=" + averageTemp +
'}';
}
}
2、mapper方法
在mapper端统计计数
context.getCounter("Total","not-match").increment(1);解释:命名为Total组下的not-match有几个
public class AverageMapper extends Mapper<LongWritable,Text,Text,AverageTuple> {
Text mapKey=new Text();
AverageTuple averageTuple=new AverageTuple();
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
mapKey.set(value.toString().split(" ")[0]);
averageTuple.setCount(1);
averageTuple.setAverageTemp(Double.parseDouble(value.toString().split(" ")[1]));
context.getCounter("Total","not-match").increment(1);
context.write(mapKey,averageTuple);
}
}
3、reducer
public class AverageReducer extends Reducer<Text,AverageTuple,Text,AverageTuple> {
Text uid= new Text();
AverageTuple reduceValue=new AverageTuple();
public void reduce(Text key,Iterable<AverageTuple> values,Context context) throws IOException, InterruptedException {
int count=0;
int sum=0;
for (AverageTuple average:values){
count+=average.getCount();
sum+=average.getAverageTemp()*average.getCount();
}
reduceValue.setCount(count);
reduceValue.setAverageTemp(sum/count);
uid.set(key);
context.write(uid,reduceValue);
}
}
4、main方法调用reducer为Combiner函数
job.setCombinerClass(AverageReducer.class);
public class AverageMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//构建job对象
Job job = Job.getInstance(new Configuration());
//注意:main方法所在的类
job.setJarByClass(AverageMain.class);
//设置mapper相关属性
job.setMapperClass(AverageMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(AverageTuple.class);
// FileInputFormat.setInputPaths(job, new Path("D:words.txt"));
//设置reducer相关属性
job.setReducerClass(AverageReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(AverageTuple.class);
job.setCombinerClass(AverageReducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// FileOutputFormat.setOutputPath(job, new Path("D:wcout510"));
//提交任务
int code=job.waitForCompletion(true)?0:1;
if(code==0){
for(Counter counter:job.getCounters().getGroup("Total")){
System.out.println(counter.getDisplayName()+"是"+counter.getValue());
}
}
}
}