Map处理 :public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>
Reduce处理: public class WordCountReducer extends Reducer <Text, IntWritable, Text, IntWritable>
Job配置: public class WordCountDriver //关联使用的Mapper类
job.setMapperClass(WordCountMapper.class); //关联使用的Reducer类
job.setReducerClass(WordCountReducer.class);
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ //对数据进行打散 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //输入数据 hello world love work String line = value.toString(); //对数据切分 String[] words=line.split(" "); //写出<hello, 1> for(String w:words) { //写出reducer端 context.write(new Text(w), new IntWritable(1));}}}
public class WordCountReducer extends Reducer <Text, IntWritable, Text, IntWritable>{ protected void reduce(Text Key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //记录出现的次数 int sum=0; //累加求和输出 for(IntWritable v:values) { sum +=v.get(); } context.write(Key, new IntWritable(sum)); }}
public class WordCountDriver { public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { // 设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建job任务 Configuration conf=new Configuration(); Job job=Job.getInstance(conf); //指定jar包位置 job.setJarByClass(WordCountDriver.class);//关联使用的Mapper类 job.setMapperClass(WordCountMapper.class); //关联使用的Reducer类 job.setReducerClass(WordCountReducer.class); //设置Mapper阶段输出的数据类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //设置Reducer阶段输出的数据类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class);//设置数据输入路径和文件名 FileInputFormat.setInputPaths(job, new Path("c:\\in\\aa.txt")); //设置数据输出路径 FileOutputFormat.setOutputPath(job, new Path("c:\\out")); //提交任务 Boolean rs=job.waitForCompletion(true); //退出System.exit(rs?0:1); }}
生成jar包之后,在虚拟机上运行。