Hadoop MultipleOutput例子

最新推荐文章于 2024-04-18 17:15:09 发布

常生果

最新推荐文章于 2024-04-18 17:15:09 发布

阅读量334

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/zhyooo123/article/details/77866441

版权

hadoop 专栏收录该内容

28 篇文章 2 订阅

订阅专栏

package com.analyzer.search_task;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;


public class SearchBrand extends Base{

    public static class FileMapper  extends Mapper<LongWritable, Text, Text, Text> {
        @Override   
        protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {

            if(value==null){
                return;
            }

            String str = value.toString();          
            if(str==null || str.length()<2){
                return;
            }

            String[] array =str.split("\\|");
            if(array==null || array.length<1){
                return;
            }

            String key_str="null";
            for(String item : array){
                if(item!=null && item.contains("$")){
                    key_str = item.trim();
                    break;
                }
            }

            for(String item : array){
                if(item!=null && item.contains("@")){               
                    context.write(new Text(item.trim()), new Text(array[0]+"|"+key_str.trim()));
                }
            }

            array=null;
            value.clear();
        }
    }

    public static class ActionReducer extends Reducer<Text, Text, NullWritable, Text> { 

        /**
         * 设置文件多路输出
         * 
         * */
        private MultipleOutputs<NullWritable,Text> mos;

        @Override
        protected void setup(Context context) throws IOException,InterruptedException {
            mos = new MultipleOutputs<NullWritable,Text>(context);
        }

        @Override
        protected void cleanup(Context context) throws IOException,InterruptedException {

            if(mos!=null){//注意要调用close方法，否则会没有输出
                 mos.close();
                 mos =null;
            }           
        }


        @Override    
        protected void reduce(Text key, Iterable<Text> iter,Context context) throws IOException,InterruptedException {

             if(iter==null){                     
                 return;
             }

             context.getCounter(CounterRecorder.TOTAL).increment(1);

             for(Text item : iter){     

                 if(item==null){
                     continue;
                 }

                 String str = item.toString();
                 if(!StringUtils.isBlank(str) && !isBrand(str)){//不为空且不是品牌信息，就输出    

                     String[]array =str.split("\\|", -1);
                     if(array!=null && array.length>1){//去掉关键字key

                         mos.write("key",NullWritable.get(),new Text(key.toString()+"|"+array[1]));                 
                         context.getCounter(CounterRecorder.SUCCEED).increment(1);  

                     }else{//多路输出需要完善的信息
                         context.getCounter(CounterRecorder.FAILE).increment(1);    
                         mos.write("unkey",NullWritable.get(),new Text(key.toString()+"--"+str));   
                     }                   

                 }else{//多路输出需要完善的信息                     
                     context.getCounter(CounterRecorder.FAILE).increment(1);    
                     mos.write("unkey",NullWritable.get(),new Text(key.toString()+"--"+str));                
                 }
             }
        }


        /**
         * 判断是否品牌方法
         * 
         * @param str
         *    sm|samsung$三星a510    
         *    
         * @return boolean
         * */
        private boolean isBrand(String str){                 

             String [] datas = str.split("\\|");             
             if(datas!=null && datas.length>1){

                 if("sm".equals(datas[0])){                      
                     datas[0] = "samsung";
                 }               
                 if(datas[1]!=null && datas[1].startsWith(datas[0])){ //品牌信息    
                     datas =null;
                     return true;
                 }
             }                           
             return false;
        }   
    }

    @Override
    public int run(String[] args) throws Exception {
        // TODO Auto-generated method stub   

         Configuration conf =this.getConf();            
         Job job = Job.getInstance(conf);           
         job.setJobName("SearSbandTask_T");         
         job.setJarByClass(SearchBrand.class);          

         job.setInputFormatClass(TextInputFormat.class);            
         job.setMapperClass(FileMapper.class);          
         job.setReducerClass(ActionReducer.class);   

         job.setMapOutputKeyClass(Text.class);          
         job.setMapOutputValueClass(Text.class);

         job.setOutputKeyClass(NullWritable.class);         
         job.setOutputValueClass(Text.class);

         //--------------------添加多路输出----------------------------

         //BaseOnKey的输出
         MultipleOutputs.addNamedOutput(job,"key",TextOutputFormat.class,NullWritable.class,Text.class);
         //没有区分的所有输出
         MultipleOutputs.addNamedOutput(job,"unkey",TextOutputFormat.class,NullWritable.class,Text.class);
        //取消part-r-00000新式文件输出          
         LazyOutputFormat.setOutputFormatClass(job,TextOutputFormat.class);


         //-------------------设置输出路径-----------------------------------
         Path inPath = new Path(args[0]);
         Path outPath = new Path(args[1]);          

         FileSystem fs = FileSystem.get(conf);          
         if(fs.exists(outPath)){             
             fs.deleteOnExit(outPath);
         }                   
         fs.close();

         FileInputFormat.addInputPath(job,inPath);          
         FileOutputFormat.setOutputPath(job,outPath);

         //---------------------控制Reduce的个数------------------------------

         //job.setNumReduceTasks(0);

         //---------------------输出统计数字-----------------------------------

         boolean isCompletion =job.waitForCompletion(true);

         if(isCompletion) { 

             System.out.println("Total num:" + job.getCounters().findCounter(CounterRecorder.TOTAL).getValue());             
             System.out.println("key num:" + job.getCounters().findCounter(CounterRecorder.SUCCEED).getValue());  
             System.out.println("unkey num:" + job.getCounters().findCounter(CounterRecorder.FAILE).getValue());             
         }

         return isCompletion ? 0 : 1;
    }

    public static int startTask(Configuration con,String[] args) throws Exception{      

        return ToolRunner.run(con,new SearchBrand(),args);  
    }

    public static void main(String[]args) throws Exception{
        Configuration con =new Configuration();     
        startTask(con,new String[]{"/user/out_temp/UA-r-00000","/user/sband"});
    }
}

常生果

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
Hadoop MultipleOutput例子

package com.analyzer.search_task;import java.io.IOException;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.a
复制链接

扫一扫