WordCount过程实现的两种方式

第一种 MapReduce实现WordCount过程
思路示意图
在这里插入图片描述
先写Mapper

package cnkgcmr;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//import parquet.column.values.dictionary.DictionaryValuesWriter;
import java.io.IOException;

//hdfs mapreduce yarn common hadoop 输入进的第一行 怎么读取是根据偏移量
//hdfs 1
//输入进的第一行 KEYIN是LongWritable类型
public class WCMapper<IntegerWriter> extends Mapper<LongWritable, Text,Text, IntWritable> {
    Text k = new Text();
    IntWritable v = new IntWritable(1);

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1.把单词切分
        String[] words = value.toString().split(" ");
        //2.输出
        for (String word : words) {
            k.set(word);
            context.write(k,v);
        }
    }
}

再写Reducer

package cnkgcmr;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WCReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
    IntWritable v = new IntWritable();

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        for(IntWritable value : values){
            sum += value.get();
        }
        v.set(sum);
        context.write(key,v);
    }
}

最后再写Driver

package cnkgcmr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WCDriver {
    public static void main(String[] args) throws Exception {
        //1.获取配置信息以及封装任务
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        //2.设置jar加载路径,一般设置都是Driver类的.class
        job.setJarByClass(WCDriver.class); // 需要设置运行的主类
        //3.设置mapper
        job.setMapperClass(WCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //4.设置reducer
        job.setReducerClass(WCReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //5.设置输入和输出路径
        FileInputFormat.setInputPaths(job,new Path("D:\\INTELLIJ\\test01\\data\\wc.txt"));
        //FileInputFormat.setInputPaths(job,new Path(args[0]));
        //输出路径要求不存在
        FileOutputFormat.setOutputPath(job,new Path("D:\\INTELLIJ\\test01\\data\\output2"));
        //FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //6.提交任务
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

由开始
hdfs mapreduce yarn common hadoop
hdfs mapreduce yarn common hadoop
hdfs mapreduce yarn common hadoop
hdfs mapreduce yarn common hadoop
hdfs mapreduce yarn common hadoop

到最后运行出来的结果
common 5
hadoop 5
hdfs 5
mapreduce 5
yarn 5

第二种 hivesql语句实现

select word,count(*)cnt from (
	select explode(split("aa,bb,cc,cc,ww,aa,aa,bb",",")) word) t 
group by word;
//运行效果
+-------+------+--+
| word  | cnt  |
+-------+------+--+
| aa    | 3    |
| bb    | 2    |
| cc    | 2    |
| ww    | 1    |
+-------+------+--+
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值