//map阶段
package com.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//创建一个CountMap类继承Mapper 定义四中类型
public class CountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
//重写map方法
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//拿到一行数据转化为String
String word = value.toString();
//将这一行数据按照分隔符进行内容的切割
//切割成一个数组
String[] split = word.split(" ");
//遍历数组,每出现一个单词就标记一个数字1<单词,1>
for (String w : split) {
context.write(new Text(w), new IntWritable(1));
}
}
}
//hadoop自己封装的一套数据类型
//long----->longwritable
//String------>Text
//Interage------->Intwritable
//null-------->Nullwritable
package com.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
//定义一个CountReadius继承Reducer
public class CountReadius extends Reducer<Text, IntWritable,Text,IntWritable> {
//重写Reduce方法
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//定义一个计数器
Integer count = 0;
//遍历一组迭代器,把每一个数1加起来
for (IntWritable v:values){
count++;
}
//输出最总结果
context.write(key, new IntWritable(count));
}
}
package com.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
//运行mr程序的主类
public class CountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//通过job来封装本次mr的相关信息
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","local");
conf.set("yarn.resorcemanager.hostname","192.168.72.110");
conf.set("fs.deafutFS","hdfs://192.168.72.110:9000/");
Job job = Job.getInstance(conf);
//指定本次mr job jar包运行主类
job.setJarByClass(CountDriver.class);
//设置本次job是使用map,reduce类
job.setMapperClass(CountMap.class);
job.setReducerClass(CountReadius.class);
//设置本次map和reduce的输出k,v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置本次mr最总输出类型
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
//制定本次job读取源数据时需要用到的组件:我们的源文件在hdfs的文本文件中,用TextInputFormat
//job.setInputFormatClass(TextInputFormat.class);
//制定本次job输出数据需要的组件:我们要输出到hdfs文件中,用TextOutputFormat
//job.setOutputFormatClass(TextOutputFormat.class);
//设置输入路径
FileInputFormat.setInputPaths(job,new Path("E:\\hadoop\\wordcount.txt"));
FileOutputFormat.setOutputPath(job,new Path("E:\\hadoop\\output"));
//提交任务,客户端返回
job.submit();//不会追踪
//提交程序并且监控打印情况
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}