package com.juruo.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.FileInputStream;
import java.io.IOException;
import static org.apache.hadoop.mapreduce.Job.getInstance;
public class MyWordCount {
static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable> {
//Mapper<KETIN,VALUEIN,KEYOUT,VALUEOUT>
//KEYIN:框架要传送给咱们写的map方法的输入参数key的数据类型
//默认情况下,框架要传入的key是框架从待处理数据(文本文件)中,读取的某一行数据的偏移量
//所以数据类型是Long(对于海量数据来说int不够用)
//VALUE:框架要传送给咱们自己写的map方法的数据参数value的数据类型
【MR】windows本地运行wordcount
最新推荐文章于 2022-08-29 08:18:34 发布