合并文档

报恩的猫

于 2014-05-16 18:46:01 发布

阅读量441

点赞数

分类专栏： Hadoop

Hadoop 专栏收录该内容

43 篇文章 0 订阅

订阅专栏

资源文件file1

hadoop

test

hello

word

资源文件file2

happy

birthday

this

test

最终的结果

hadoop

test

hello

word

happy

birthday

this

test

分析：将两个文件合并成一个文件，是一个很简单的案例。设想我们可以将value设为空，这样就只有key在输出的时候直接数据就可以了。map过程将两个文件的每一行设为key，值设为空。在Reduce阶段只用将map阶段整理好的数据输出就可以了。

实现：

 
         package  
         com.bwzy.hadoop; 
        
         import  
         java.io.IOException; 
        
         import  
         java.util.StringTokenizer; 
        
         import  
         org.apache.hadoop.conf.Configured; 
        
         import  
         org.apache.hadoop.fs.Path; 
        
         import  
         org.apache.hadoop.io.IntWritable; 
        
         import  
         org.apache.hadoop.io.LongWritable; 
        
         import  
         org.apache.hadoop.io.Text; 
        
         import  
         org.apache.hadoop.mapreduce.Job; 
        
         import  
         org.apache.hadoop.mapreduce.Mapper; 
        
         import  
         org.apache.hadoop.mapreduce.Reducer; 
        
         import  
         org.apache.hadoop.mapreduce.Mapper.Context; 
        
         import  
         org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
        
         import  
         org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
        
         import  
         org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
        
         import  
         org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
        
         import  
         org.apache.hadoop.util.Tool; 
        
         import  
         org.apache.hadoop.util.ToolRunner; 
        
         import  
         com.bwzy.hadoop.WordCount.Map; 
        
         import  
         com.bwzy.hadoop.WordCount.Reduce; 
        
         public  
         class  
         HeBing  
         extends  
         Configured  
         implements  
         Tool { 
        
         public  
         static  
         class  
         Map 
        
         extends  
         Mapper<LongWritable, Text, Text, Text> { 
        
         public  
         void  
         map(LongWritable key, Text value, Context context) { 
        
         String line = value.toString(); 
        
         try  
         { 
        
         context.write( 
         new  
         Text(line),  
         new  
         Text( 
         "" 
         )); 
        
         }  
         catch  
         (IOException e) { 
        
         e.printStackTrace(); 
        
         }  
         catch  
         (InterruptedException e) { 
        
         e.printStackTrace(); 
        
         } 
        
         } 
        
         } 
        
         public  
         static  
         class  
         Reduce  
         extends 
        
         Reducer<Text, Text, Text, Text> { 
        
         public  
         void  
         reduce(Text key, Iterable<Text> values, 
        
         Context context)  
         throws  
         IOException, InterruptedException { 
        
         context.write(key,  
         new  
         Text( 
         "" 
         )); 
        
         } 
        
         } 
        
         @Override 
        
         public  
         int  
         run(String[] arg0)  
         throws  
         Exception { 
        
         Job job =  
         new  
         Job(getConf()); 
        
         job.setJobName( 
         "HeBing" 
         ); 
        
         job.setOutputKeyClass(Text. 
         class 
         ); 
        
         job.setOutputValueClass(Text. 
         class 
         ); 
        
         job.setMapperClass(Map. 
         class 
         ); 
        
         job.setCombinerClass(Reduce. 
         class 
         ); 
        
         job.setReducerClass(Reduce. 
         class 
         ); 
        
         job.setInputFormatClass(TextInputFormat. 
         class 
         ); 
        
         job.setOutputFormatClass(TextOutputFormat. 
         class 
         ); 
        
         FileInputFormat.setInputPaths(job,  
         new  
         Path(arg0[ 
         0 
         ])); 
        
         FileOutputFormat.setOutputPath(job,  
         new  
         Path(arg0[ 
         1 
         ])); 
        
         boolean  
         success = job.waitForCompletion( 
         true 
         ); 
        
         return  
         success ?  
         0  
         :  
         1 
         ; 
        
         } 
        
         public  
         static  
         void  
         main(String[] args)  
         throws  
         Exception { 
        
         int  
         ret = ToolRunner.run( 
         new  
         HeBing(), args); 
        
         System.exit(ret); 
        
         } 
        
         }