最大值、最小值并求和
注意事项:
(1)、自己定义一个输出类型MinMaxCountTuple 。
(2)、使用Combiner提前聚合。
1、自定义Writeable
注意:(1)、定义private成员变量,写get和set方法
(2)、写read和write方法,和toString方法(也就是输出的格式)
package MaxMinCount;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class MinMaxCountTuple implements Writable {
private long min;
private long max;
private long count;
public long getMin() {
return min;
}
public void setMin(long min) {
this.min = min;
}
public long getMax() {
return max;
}
public void setMax(long max) {
this.max = max;
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(min);
dataOutput.writeLong(max);
dataOutput.writeLong(count);
}
public void readFields(DataInput dataInput) throws IOException {
min=dataInput.readLong();
max=dataInput.readLong();
count=dataInput.readLong();
}
@Override
public String toString() {
return "MinMaxCountTuple{" +
"min=" + min +
", max=" + max +
", count=" + count +
'}';
}
}
2、输入数据
4 3
5 1
6 2
7 9
8 4
9 8
10 3
10 1
11 2
12 9
23 4
24 8
36 3
3、mapper方法
注意(1)、继承Mapper方法,写好类型
(2)、定义要输出的序列化格式字段为成员变量
(3)、Text类型变成toString类型进行处理
public class MaxMinCountMapper extends Mapper<LongWritable ,Text,Text, MinMaxCountTuple> {
Text outkey=new Text();
MinMaxCountTuple minMaxCountTuple=new MinMaxCountTuple();
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
minMaxCountTuple.setMin(Long.parseLong(value.toString().split(" ")[1]));
minMaxCountTuple.setMax(Long.parseLong(value.toString().split(" ")[1]));
minMaxCountTuple.setCount(1);
outkey.set((value.toString().split(" ")[0]));
context.write(outkey,minMaxCountTuple);
}
}
4、reducer方法
注意:(1)、同一个分区里面的会走reduce方法
public class MaxMinCountReducer extends Reducer<Text, MinMaxCountTuple, Text, MinMaxCountTuple> {
Text key=new Text();
MinMaxCountTuple result=new MinMaxCountTuple();
public void reduce(Text textKey, Iterable<MinMaxCountTuple> values, Context context) throws IOException, InterruptedException {
result.setCount(0);
result.setMin(Long.MAX_VALUE);
result.setMax(Long.MIN_VALUE);
long sum=0;
for (MinMaxCountTuple value:values){
if(result.getMin()>=value.getMin()){
result.setMin(value.getMin());
}
if(result.getMax()<=value.getMax()){
result.setMax(value.getMax());
}
sum+=value.getCount();
}
result.setCount(sum);
key=textKey;
context.write(key,result);
}
}
5、main方法
public class MaxMinCountMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//构建job对象
Job job = Job.getInstance(new Configuration());
//注意:main方法所在的类
job.setJarByClass(MaxMinCountMain.class);
//设置mapper相关属性
job.setMapperClass(MaxMinCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MinMaxCountTuple.class);
// FileInputFormat.setInputPaths(job, new Path("D:words.txt"));
//设置reducer相关属性
job.setReducerClass(MaxMinCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(MinMaxCountTuple.class);
job.setCombinerClass(MaxMinCountReducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// FileOutputFormat.setOutputPath(job, new Path("D:wcout510"));
//提交任务
job.waitForCompletion(true);
}
}