//本地运行和远程调用需要
/* static {
System.setProperty(“hadoop.home.dir”, “E:\x3\hadoop-2.9.2”);
}*/
public static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable>{
/**
*
* @param key 行索引
* @param value 每行数据
* @param context 上下文环境
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1.将每行数据拆分成单词数组
String line = value.toString();
String[] split = line.split(" ");
//2.将各个单词映射成<k,v>
for (String word : split){
//3.写到内存缓冲区
context.write(new Text(word),new LongWritable(1));
}
}
}
public static class MyReduce extends Reducer<Text,LongWritable,Text,LongWritable>{
/**
*
* @param key 行索引
* @param values 每行数据
* @param context 应用上下文
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
Long sum = 0L;
//①遍历指定key单词对应的个数集合[1,1,1]
for (LongWritable value : values){
//累加个数
sum += value.get();
}
//输出
context.write(key,new LongWritable(sum));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//初始化一个job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word-count");
/*************************3.打jar集群方式start******************************/
job.setJarByClass(WordCount.class);
/*************************3.打jar集群方式start******************************/
//1.输入文件
//输入文件需要这个类 切片
FileInputFormat.addInputPath(job ,new Path(args[0]));
//2.map并行计算
job.setMapperClass(MyMapper.class);
//如果map输出的k和v类型 与 reduce输出的k和v类型一致的话,可省略
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(LongWritable.class);
//3.shuffle流程
//4.reduce计算
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//5.输出文件
FileOutputFormat.setOutputPath(job,new Path(args[1]));
// 6.提交作业
boolean result = job.waitForCompletion(true);
System.out.println(result);
}