/**
* MyMRRunProcess.java
* com.hnxy.mr
* Copyright (c) 2019, 子墨版权所有.
* @author ZIMO
* @Date 2019年9月4日
*/
public class MyMRRunProcess extends Configured implements Tool{
/**
* MAP CLASS
* @author ZIMO
* @Date 2019年9月4日
*/
private static class MyMapper extends Mapper<LongWritable, Text, Text,IntWritable > {
//定义map的变量
private Text outkey = new Text();
private IntWritable outval = new IntWritable(1);
private String[] strs = null;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
//运行
strs = value.toString().split("\t");
if (null != strs && strs.length > 0) {
for (String s : strs) {
outkey.set(s);
context.write(outkey, outval);
}
}
}
}
/**
* REDUCE CLASS
* @author ZIMO
* @Date 2019年9月4日
*/
private static class MyReduce extends Reducer<Text,IntWritable, Text, LongWritable> {
//定义rd的变量
private LongWritable outval = new LongWritable();
private Text outkey = new Text();
private Long tmp = 0L;
private Map<String,Long> map = new HashMap<String,Long>();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
tmp = 0L;
for (IntWritable i: values) {
tmp +=i.get();
}
outval.set(tmp);
//context.write(key, outval);
//reduce能够保证所有的key count-->map
map.put(key.toString(), outval.get());
}
//clean up
//在reduce执行完成之后再执行一次
@Override
protected void cleanup(Reducer<Text, IntWritable, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
//对map排序
//System.out.println(map.toString());
//问题1:如果比较,要实现sort方法map-->list
List<Map.Entry<String, Long>> list = new LinkedList<Map.Entry<String, Long>>(map.entrySet());
//问题2:list如何排序,值倒序排序
Collections.sort(list,new Comparator<Map.Entry<String, Long>>() {
//倒序排序
@Override
public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
//如何输出前五名
for (int i = 0; i < 5; i++) {
outkey.set(list.get(i).getKey());
outval.set(list.get(i).getValue());
context.write(outkey,outval);
}
}
}
/**
* JOB RUNNER METHOD
* (non-Javadoc)
* @see org.apache.hadoop.util.Tool#run(java.lang.String[])
*/
public int run(String[] args) throws Exception {
//创建方法的返回值
int count = -1;
// 创建配置文件加载对象
Configuration conf = this.getConf();
//创建本次job
Job job = Job.getInstance(conf, "MRTP");
//设置job
//第一阶段:处理输入与输出
job.setJarByClass(MyMRRunProcess.class);
// 设置输入与输出路径
Path in = new Path(args[0]);
Path out = new Path(args[1]);
// 判断输出路径是否存在 如果存在就删除
FileSystem fs = FileSystem.get(conf);
if(fs.exists(out)){
fs.delete(out,true);
System.out.println("The old path is deleted!");
}
// 输入与输出的格式化
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// 设置输入与输出的目录
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
// 第二阶段 : 设置 MR参数
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 第三阶段 : 执行阶段
count = job.waitForCompletion(true)?0:-1;
// 返回
return count;
}
/**
* PROGRAM MAIN METHOD
* @param args VIP
*/
public static void main(String[] args) {
try {
//执行
int result = ToolRunner.run(new MyMRRunProcess(), args);
//判断
String msg = result == 0?"JOB OK!":"JOB FAIL!";
System.out.println(msg);
System.exit(result);
} catch (Exception e) {
e.printStackTrace();
}
}
}
2019-9-4 [MapReduce] (400MB以下)小数据:查找TOP5
最新推荐文章于 2021-04-25 17:00:15 发布