MapReduce的自定义分组

  1 package com.mengyao.hadoop.mapreduce;
  2 
  3 import java.io.IOException;
  4 import java.text.SimpleDateFormat;
  5 import java.util.Date;
  6 
  7 import org.apache.hadoop.conf.Configuration;
  8 import org.apache.hadoop.conf.Configured;
  9 import org.apache.hadoop.fs.Path;
 10 import org.apache.hadoop.io.LongWritable;
 11 import org.apache.hadoop.io.Text;
 12 import org.apache.hadoop.mapreduce.Job;
 13 import org.apache.hadoop.mapreduce.Mapper;
 14 import org.apache.hadoop.mapreduce.Reducer;
 15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 18 import org.apache.hadoop.util.Tool;
 19 import org.apache.hadoop.util.ToolRunner;
 20 
 21 
 22 public class MyGroupApp extends Configured implements Tool {
 23 
 24     static class MyGroupMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
 25         
 26         private Text k = null;
 27         private LongWritable v = null;
 28         
 29         @Override
 30         protected void setup(
 31                 Mapper<LongWritable, Text, Text, LongWritable>.Context context)
 32                 throws IOException, InterruptedException {
 33             k = new Text();
 34             v = new LongWritable(1L);
 35         }
 36 
 37         @Override
 38         protected void map(LongWritable key, Text value, Context context)
 39                 throws IOException, InterruptedException {
 40             final String[] words = value.toString().split("\t");
 41             for (String word : words) {
 42                 k.set(word);
 43                 context.write(k, v);
 44             }
 45         }
 46     }
 47     
 48     static class MyGroupReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
 49         @Override
 50         protected void reduce(Text key, Iterable<LongWritable> value, Context context)
 51                 throws IOException, InterruptedException {
 52             long count = 0L;
 53             for (LongWritable item : value) {
 54                 count += item.get();
 55             }
 56             context.write(key, new LongWritable(count));
 57         }
 58     }
 59     
 60     @Override
 61     public int run(String[] arg0) throws Exception {
 62         Configuration conf = getConf();
 63         conf.set("mapreduce.job.jvm.numtasks", "-1");        
 64         conf.set("mapreduce.map.speculative", "false");        
 65         conf.set("mapreduce.reduce.speculative", "false");    
 66         conf.set("mapreduce.map.maxattempts", "4");            
 67         conf.set("mapreduce.reduce.maxattempts", "4");        
 68         conf.set("mapreduce.map.skip.maxrecords", "0");        
 69         Job job = Job.getInstance(conf, MyGroupApp.class.getSimpleName());
 70         job.setJarByClass(MyGroupApp.class);
 71         job.setInputFormatClass(TextInputFormat.class);
 72         
 73         FileInputFormat.addInputPath(job, new Path(arg0[0]));
 74         FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
 75         
 76         job.setMapperClass(MyGroupMapper.class);
 77         job.setMapOutputKeyClass(Text.class);
 78         job.setMapOutputValueClass(LongWritable.class);
 79         
 80         job.setReducerClass(MyGroupReducer.class);
 81         job.setOutputKeyClass(Text.class);
 82         job.setOutputValueClass(LongWritable.class);
 83         
 84         return job.waitForCompletion(true)?0:1;
 85     }
 86     
 87     
 88     public static int createJob(String[] args) {
 89         Configuration conf = new Configuration();
 90         int status = 1;
 91         try {
 92             status = ToolRunner.run(conf, new MyGroupApp(), args);
 93         } catch (Exception e) {
 94             e.printStackTrace();
 95             throw new RuntimeException(e);
 96         }
 97         
 98         return status;
 99     }
100     
101     public static void main(String[] args) throws Exception {
102         //此处用ant直接编译打包上传运行,先行赋值
103         args = new String[]{"/testdata/words", "/job/mapreduce/"+WordCountApp.class.getSimpleName()+"_"+new SimpleDateFormat("yyyyMMddhhMMss").format(new Date())};
104         if (args.length != 2) {
105             System.out.println("Usage: "+WordCountApp.class.getSimpleName()+" <in> <out>");
106             System.exit(2);
107         } else {
108             int status = createJob(args);
109             System.exit(status);
110         }
111 
112     }
113 
114 }

 

转载于:https://www.cnblogs.com/mengyao/archive/2013/02/07/4865584.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值