mr WordCount示例编写

最新推荐文章于 2022-06-26 22:14:08 发布

阿&伦

最新推荐文章于 2022-06-26 22:14:08 发布

阅读量235

点赞数

分类专栏：总结文章标签： hadoop

本文链接：https://blog.csdn.net/weixin_45526489/article/details/100619081

版权

总结专栏收录该内容

65 篇文章 0 订阅

订阅专栏

//map阶段
package com.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
//创建一个CountMap类继承Mapper 定义四中类型
public class CountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    //重写map方法
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //拿到一行数据转化为String
        String word = value.toString();
        //将这一行数据按照分隔符进行内容的切割
        //切割成一个数组
        String[] split = word.split(" ");
        //遍历数组，每出现一个单词就标记一个数字1<单词，1>
        for (String w : split) {
            context.write(new Text(w), new IntWritable(1));
        }
    }
}
//hadoop自己封装的一套数据类型
//long----->longwritable
//String------>Text
//Interage------->Intwritable
//null-------->Nullwritable

package com.mr;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
//定义一个CountReadius继承Reducer
public class CountReadius  extends Reducer<Text, IntWritable,Text,IntWritable> {
    //重写Reduce方法
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //定义一个计数器
        Integer count = 0;
        //遍历一组迭代器，把每一个数1加起来
        for (IntWritable v:values){
            count++;

        }
        //输出最总结果
        context.write(key, new IntWritable(count));
    }
}

package com.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
//运行mr程序的主类
public class CountDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //通过job来封装本次mr的相关信息
        Configuration conf = new Configuration();
        conf.set("mapreduce.framework.name","local");
        conf.set("yarn.resorcemanager.hostname","192.168.72.110");
        conf.set("fs.deafutFS","hdfs://192.168.72.110:9000/");

        Job job = Job.getInstance(conf);
        //指定本次mr job jar包运行主类
        job.setJarByClass(CountDriver.class);
        //设置本次job是使用map，reduce类
        job.setMapperClass(CountMap.class);
        job.setReducerClass(CountReadius.class);
        //设置本次map和reduce的输出k,v类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //设置本次mr最总输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputKeyClass(IntWritable.class);
        //制定本次job读取源数据时需要用到的组件：我们的源文件在hdfs的文本文件中，用TextInputFormat
        //job.setInputFormatClass(TextInputFormat.class);
        //制定本次job输出数据需要的组件：我们要输出到hdfs文件中，用TextOutputFormat
        //job.setOutputFormatClass(TextOutputFormat.class);
        //设置输入路径
        FileInputFormat.setInputPaths(job,new Path("E:\\hadoop\\wordcount.txt"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\hadoop\\output"));
        //提交任务，客户端返回
        job.submit();//不会追踪
        //提交程序并且监控打印情况
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);


    }

}