需求:求一个文件中金额前三的数据
分析:首先实现序列化,重写cpmpareto方法,再根据TOPN再reduce中实现
求TOPN的文件:
AA,5
BB,10
CC,20
DD,20
EE,15
package com.ruozedata.bigdata.hadoop.mapreduce.TopN;
import com.ruozedata.bigdata.hadoop.utils.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.TreeMap;
public class TopNAppV2 {
public static void main(String[] args) throws Exception{
//获取job对象
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
String input="data/top.data";
String output="out/";
FileUtils.deleteTarget(output,configuration);
//设置jar相关信息
job.setJarByClass(TopNAppV2.class);
//设置自定义的mapper跟Reducer
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//设置mapper阶段输出的key跟value类型
job.setMapOutputKeyClass(RuozedataInt.class);
job.setMapOutputValueClass(Text.class);
//设置reducer阶段输出的key跟vlaue类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(RuozedataInt.class);
//设置输入输出路径
FileInputFormat.setInputPaths(job,new Path(input));
FileOutputFormat.setOutputPath(job,new Path(output));
//提交job
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
public static final int topn=3;
public static class RuozedataInt extends IntWritable{
public RuozedataInt(){}
public RuozedataInt(int vlaue){
super(vlaue);
}
@Override
public int compareTo(IntWritable o) {
return -super.compareTo(o);
}
}
public static class MyMapper extends Mapper<LongWritable, Text,RuozedataInt,Text >{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
int price = Integer.parseInt(splits[1]);
String produce = splits[0];
context.write(new RuozedataInt(price),new Text(produce));
}
}
public static class MyReducer extends Reducer<RuozedataInt,Text,Text,RuozedataInt>{
int index = 0;
@Override
protected void reduce(RuozedataInt key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for(Text vlaue:values){
if (index < topn){
context.write(vlaue,key);
}
index ++;
}
}
}
}