MapReduce初次编程详解（1）：单词统计

最新推荐文章于 2024-05-09 22:58:28 发布

今天莲莲掉头发了吗

最新推荐文章于 2024-05-09 22:58:28 发布

阅读量284

点赞数

文章标签： mapreduce 大数据 hadoop

本文链接：https://blog.csdn.net/qq_40797864/article/details/106367715

版权

在这里插入图片描述

public class WordCountDriver {

    public static void main(String[] args) throws Exception {
        
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);//创建新的作业，读mapreduce的配置
        
        job.setJarByClass(WordCountDriver.class);//设置驱动器的类，即main函数的
        
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        
        job.setMapOutputKeyClass(Text.class);//设置map输出的键类型，输入的省略了
        job.setMapOutputValueClass(IntWritable.class);//设置map输出的值类型
        
        job.setOutputKeyClass(Text.class);//设置reduce输出的键类型
        job.setOutputValueClass(IntWritable.class);
        
        job.setInputFormatClass(TextInputFormat.class);//输入模块，TextInputFormat是mapreduce默认的，
        											   //以行为单位来读取
        
        FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:9000/input"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output"));
        
        boolean res = job.waitForCompletion(true);//waitForCompletion：提交
        System.exit(res ? 0 : 1);
        
    }

}

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper/*必须继承Mapper类*/<LongWritable, Text, Text, IntWritable> {
															  //要有4个类，因为是JAVA静态编程要指定Map输入、输出的键值对的数据类型
															  //第一个LongWritable：输入的键的数据类型，可写的长整型，因为涉及磁盘I/O，要存在磁盘，所以用MapReduce内置类型LongWritable；第一个Text：输入的值的数据类型
															  //第二个LongWritable：输入的键的数据类型，第二个Text:类比前面的
    @Override		//LongWritable输入键的类型 key输入的键       Context：上下文，相当于存放中间结果的一个容器，容纳map的输出结果
    protected void map(LongWritable key, Text value, Context context)//每有一个Map任务就调用一次map函数；此处是每一行就调用一次map函数
            throws IOException, InterruptedException/*可能会抛出异常*/ {
        // TODO Auto-generated method stub
        //super.map(key, value, context);
        
        String line = value.toString();//获得一行,因为本身是Map内置的Text类型，需要用java内置的toString将原本的字符串转换成java内置的String
        String[] words = line.split(" ");//每行中，每“ ”就取出一个对象
        
        for (String word : words) {
            context.write(new Text(word), new IntWritable(1));//每有一个单词，就向其后输入一个1
        }				//Text：键	单词	（word是正在写的那个单词）	IntWritable：值	1
        				//要与上面的输出类型一致
    }					//直接写word不行，因为涉及要存储内存I/O，所以转换成MapReduce内置的Text类型；IntWritable同理
}

public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
											//要有4个类，因为是JAVA静态编程要指定数据类型
	  										//指定的是Reduce中的输入输出各自的键值对的数据类型
	  										//第一个Text：输入的键的数据类型，第一个IntWritable：输入的值的数据类型
	  										//第二个Text：输入的键的数据类型，第二个TIntWritable:类比前面的
    @Override						//Iterable：是一个集合，可以使用for-each循环；<>：集合的内容的数据类型 ；values:作为值的集合的内容
    protected void reduce(Text key, Iterable<IntWritable> values, Context context)//每有一个键值对就调用一次reduce函数
            throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        // super.reduce(arg0, arg1, arg2);

        int count = 0;//初始化count=0
        for (IntWritable value : values) {//将values集合内的每个值各赋值一次给value
            count += value.get();//value.get():得到values集合内的值，相加赋值给count（因为每个值都是1）
        }
        
        context.write(new Text(key), new IntWritable(count));
    }									 //count本来是int类型，涉及内存存储，转换为mapreduce的内置函数IntWritable

}

结果图：

String char的区别

今天莲莲掉头发了吗

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MapReduce初次编程详解（1）：单词统计

public class WordCountDriver { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf);//创建新的作业，读mapreduce的配置 job.setJarByClass(Word.
复制链接

扫一扫