Hadoop MapReduce程序的模板框架

最新推荐文章于 2023-05-05 11:24:50 发布

GarfieldEr007

最新推荐文章于 2023-05-05 11:24:50 发布

阅读量2.1k

点赞数

分类专栏： Hadoop 文章标签： Hadoop MapReduce 程序模板框架

本文链接：https://blog.csdn.net/GarfieldEr007/article/details/50907273

版权

Hadoop 专栏收录该内容

123 篇文章 4 订阅

订阅专栏

这里放了两个Hadoop MapReduce程序的模板框架，包括一些基本的包import语句、Mapper基类、Reducer基类、map()方法、reduce()方法，后面还有一些作业job的驱动程序，具体说是配置作业名、配置Mapper类、Reducer类、Combiner类的类名等等。

写MR程序时，程序员需要实现相应的map()函数和reduce()函数。

一、

/*
 * MapReduce程序模板，一些必要的语句
 * 写MR程序时，复制该文件，修改类名，实现相应的map、reduce函数等 
 */

import java.io.IOException; 
import java.util.StringTokenizer; // 分隔字符串
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; // 相当于int类型
import org.apache.hadoop.io.LongWritable; // 相当于long类型
import org.apache.hadoop.io.Text; // 相当于String类型
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

public class HadoopMRTemplate extends Configured implements Tool{
    
	public static class MapTemplate extends Mapper<LongWritable, Text, Text, IntWritable> { 
		// TODO: some preprocessing operations before map() function
		
		public void map(LongWritable   key, Text value, Context context) 
				throws IOException, InterruptedException {
			// map函数中参数key是偏移量，value是每一行的内容
			// TODO: implements map() function
            
        } // map( )
     } // class MapTemplate


    public static class ReduceTemplate extends Reducer<Text, IntWritable, Text, IntWritable> { //实现reduce函数
        // TODO: some preprocessing operations before reduce() function
    	
    	public void reduce(Text key, Iterable<IntWritable> values, Context context)
        	throws IOException, InterruptedException {
    		// TODO: implements reduce() function
            
            
        } // reduce( )        
    } // class ReduceTemplate
    
    public int run(String[] args) throws Exception {
        Job job = new Job(getConf()); 
        
        job.setJobName("HadoopMRTemplate"); // 作业名
        job.setOutputKeyClass(Text.class); // 类名.class生成class对象
        job.setOutputValueClass(IntWritable.class);
        job.setMapperClass(MapTemplate.class); 
        job.setReducerClass(ReduceTemplate.class); 
        job.setInputFormatClass(TextInputFormat.class); 
        job.setOutputFormatClass(TextOutputFormat.class); 
        
        FileInputFormat.setInputPaths(job, new Path(args[0])); // 作业的输入路径
        FileOutputFormat.setOutputPath(job, new Path(args[1])); // 作业的输出路径
         
        return (job.waitForCompletion(true)? 0 : 1); 
    } //run()
    
    public static void main(String[] args) throws Exception { // 调用ToolRunner.run( )
        int exitCode = ToolRunner.run(new HadoopMRTemplate(), args); 
        System.exit(exitCode); 
    } //main()
} // class HadoopMRTemplate

二、

/*
 * MapReduce程序模板，一些必要的语句
 * 写MR程序时，复制该文件，修改类名，实现相应的map、reduce函数等 
 */

import java.io.IOException;
import java.util.StringTokenizer; // 分隔字符串

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable; // 相当于int类型
import org.apache.hadoop.io.Text; // 相当于String类型
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class HadoopMRTemplate2 {

  public static class TokenizerMapper 
       extends Mapper<Object, Text, Text, IntWritable>{
        // TODO: some preprocessing operations before map() function
      
    public void map(Object key, Text value, Context context
                    ) throws IOException, InterruptedException {
    	// map函数中参数key是偏移量，value是每一行的内容
    	// TODO: implements map() function
      
    } //map()
  } // class TokenizerMapper
  
  public static class IntSumReducer 
       extends Reducer<Text,IntWritable,Text,IntWritable> {
	// TODO: some preprocessing operations before map() function

    public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context
                       ) throws IOException, InterruptedException {
    	// TODO: implements reduce() function
      
    } // reduce()
  } // class IntSumReducer

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration(); 
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: wordcount <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "HadoopMRTemplate2"); // 作业名
    
    job.setJarByClass(HadoopMRTemplate2.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    
    FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // 作业的输入路径
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // 作业的输出路径
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  } //main()
} // class HadoopMRTempalte2

GarfieldEr007

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Hadoop MapReduce程序的模板框架

这里放了两个Hadoop MapReduce程序的模板框架，包括一些基本的包import语句、Mapper基类、Reducer基类、map()方法、reduce()方法，后面还有一些作业job的驱动程序，具体说是配置作业名、配置Mapper类、Reducer类、Combiner类的类名等等。一、/* * MapReduce程序模板，一些必要的语句 * 写MR程序时，复制该文件，修改类名
复制链接

扫一扫