wordcount

最新推荐文章于 2024-10-03 20:14:40 发布

xbdbbss

最新推荐文章于 2024-10-03 20:14:40 发布

阅读量114

点赞数 1

文章标签：前端

本文链接：https://blog.csdn.net/xbdbbss/article/details/137874017

版权

public class WcMap extends Mapper<LongWritable, Text, Text, LongWritable>{
    //重写map这个方法
    //mapreduce框架每读一行数据就调用一次该方法
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        //具体业务逻辑就写在这个方法体中，而且我们业务要处理的数据已经被框架传递进来，在方法的参数中key-value
        //key是这一行数据的起始偏移量，value是这一行的文本内容
    }
}

public class WcReduce extends Reducer<Text, LongWritable, Text, LongWritable>{

    //继承Reducer之后重写reduce方法
    //第一个参数是key，第二个参数是集合。
    //框架在map处理完成之后，将所有key-value对缓存起来，进行分组，然后传递一个组<key,valus{}>，调用一次reduce方法
    protected void reduce(Text key, Iterable<LongWritable> values,Context context) 
            throws IOException, InterruptedException {
            
    }
}

public class WcRunner {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //创建配置文件
        Configuration conf = new Configuration();
        //获取一个作业
        Job job = Job.getInstance(conf);
        
        //设置整个job所用的那些类在哪个jar包
        job.setJarByClass(WcRunner.class);
        
        //本job使用的mapper和reducer的类
        job.setMapperClass(WcMap.class);
        job.setReducerClass(WcReduce.class);
        
        //指定reduce的输出数据key-value类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        
        
        //指定mapper的输出数据key-value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        
        //指定要处理的输入数据存放路径
        FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000/user/cg/input"));
        
        //指定处理结果的输出数据存放路径
        FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/user/cg/output"));
        
        //将job提交给集群运行 
        job.waitForCompletion(true);
    } 
}