MapReduce统计单词个数

测试数据

q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e
q w e t y u s hy d g h s g s e w f qw er a fs ds as
da ds sd df gf h g sds we sds sa fd sd sd as df f a w
we ew d fg s gf d h x f e f d sd r sd t ds
sd df f g x w t yu d c s t d d e

1.自定义Mapper

package com.wc;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.StringTokenizer;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
   private Text mapOutputKey = new Text();
   private IntWritable mapOutputValue = new IntWritable();
   @Override
   protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
       //1.将读取的文件变成,偏移量+内容//读取一行数据
       String linevalue = value.toString();
       //使用空格分隔
       StringTokenizer st = new StringTokenizer(linevalue);
       //判断是否还有分隔符,有的话代表还有单词
       while (st.hasMoreTokens()) {
           //返回从当前位置到下一个分隔符之间的字符串(单词)
           String word = st.nextToken();
           mapOutputKey.set(word);
           mapOutputValue.set(1);
           context.write(mapOutputKey, mapOutputValue);
       }
   }
}

2.自定义Reduce

package com.wc;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable outputValue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
     int sum = 0;    //汇总
     for (IntWritable value : values) {
     sum += value.get();
     }
     outputValue.set(sum);
     context.write(key, outputValue);
     }
}

3.自定义Driver

package com.wc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordCountDriver {
   public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
       //需要在resources下面提供core-site.xml文件
       args = new String[]{
               "src/main/resources/input2/",
               "src/main/resources/output/"
       };
       //获取配置
       Configuration cfg = new Configuration();
       Job job = Job.getInstance(cfg, WordCountDriver.class.getSimpleName());
       job.setJarByClass(WordCountDriver.class);
       //设置map与需要设置的内容类 + 输出key与value
       job.setMapperClass(WordCountMapper.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(IntWritable.class);
       //设置reduce
       job.setReducerClass(WordCountReducer.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(IntWritable.class);
       //设置input与output
       FileInputFormat.addInputPath(job, new Path(args[0]));
       Path op1 = new Path(args[1]);
       FileOutputFormat.setOutputPath(job, op1);
       FileSystem fs = FileSystem.get(cfg);
       if (fs.exists(op1)) {
           fs.delete(op1, true);
           System.out.println("存在此输出路径,已删除!!!");
       }
       //将job交给Yarn
       boolean issucess = job.waitForCompletion(true);
       int status=  issucess ? 0 : 1;
       System.exit(status);
   }

}

效果图

在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

缘不易

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值