超详细的MapReduce WordCount 统计微博评论最多的用户

超详细的MapReduce WordCount 统计微博评论最多的用户

使用fastjson解析每一行的json

List<Map<String,Object>> parses = (List<Map<String,Object>>) JSON.parse(value.toString());

提取userId

for (Map<String, Object> pars : parses) {
            String new_value = (String) pars.get("userId");
            context.write(new IntWritable(1),new Text(new_value));
        }

Mapper完整代码

package anu.mapereduce;

import com.alibaba.fastjson.JSON;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 *  yucheng_gu
 */
public class MainMapper extends Mapper<LongWritable, Text,IntWritable,Text >{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        List<Map<String,Object>> parses = (List<Map<String,Object>>) JSON.parse(value.toString());
        for (Map<String, Object> pars : parses) {
            String new_value = (String) pars.get("userId");
            context.write(new IntWritable(1),new Text(new_value));
        }
    }
}

reduce查找每个用户的出现数量

Map<String,Integer> navs = new HashMap<>();
        for (Text value : values) {
            Integer integer = navs.get(value.toString());
            if (integer == null){
                navs.put(value.toString(),1);
            }else {
                navs.put(value.toString(),integer+1);
            }
        }

把所有用户的评论数量的信息做排序

List<String> llas = new ArrayList<>();
        for (String keys_l : navs.keySet()) {
            Integer is_v = 0;
            String nname = "null";
            Map<String,Integer> new_navs=new HashMap<>();
            for (String keyaa : navs.keySet()) {
                if (! llas.contains(keyaa)){
                    new_navs.put(keyaa,navs.get(keyaa));
                }
            }
            for (String keys : new_navs.keySet()) {
                if(new_navs.get(keys)>is_v){
                    is_v = new_navs.get(keys);
                    nname = keys;
                }
            }
            llas.add(nname);
        }

输出数据

for (String lla : llas) {
            context.write(new Text(lla),new IntWritable(navs.get(lla)));
        }

Reduce完整代码

package anu.mapereduce;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.*;

public class MainReduce extends Reducer< IntWritable,Text,Text, IntWritable> {

    @Override
    protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Map<String,Integer> navs = new HashMap<>();
        for (Text value : values) {
            Integer integer = navs.get(value.toString());
            if (integer == null){
                navs.put(value.toString(),1);
            }else {
                navs.put(value.toString(),integer+1);
            }
        }
        List<String> llas = new ArrayList<>();
        for (String keys_l : navs.keySet()) {
            Integer is_v = 0;
            String nname = "null";
            Map<String,Integer> new_navs=new HashMap<>();
            for (String keyaa : navs.keySet()) {
                if (! llas.contains(keyaa)){
                    new_navs.put(keyaa,navs.get(keyaa));
                }
            }
            for (String keys : new_navs.keySet()) {
                if(new_navs.get(keys)>is_v){
                    is_v = new_navs.get(keys);
                    nname = keys;
                }
            }
            llas.add(nname);
        }
        for (String lla : llas) {
            context.write(new Text(lla),new IntWritable(navs.get(lla)));
        }
    }
}

为了方便调试不依赖集群运行,使用本地运行,具体的方法可自己百度

WordCountRunner 启动类的完整代码

package anu.mapereduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

/**
 *  yucheng_gu
 */
public class WordCountRunner{
    public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
        //注册本地hadoop驱动
        System.setProperty("hadoop.home.dir","D:\\LocalServer\\hadoop-2.9.2");
        Configuration configuration = new Configuration();
        //创建一个job任务对象,super.getConf()获取父类的configuration,jobName:任务名称
        Job myWordCount = Job.getInstance(configuration, "MyWordCount");
        //配置job任务的八个步骤
        //第一步:指定读取文件的方式和源文件的路径
        myWordCount.setInputFormatClass(TextInputFormat.class);
        //TextInputFormat.addInputPath(myWordCount,new Path(args[0]));
        //第二步:指定map阶段的处理方式,和数据类型
        myWordCount.setMapperClass(MainMapper.class);
        //设置map阶段k2的类型
        myWordCount.setMapOutputKeyClass(IntWritable.class);
        //设置map阶段v2的类型
        myWordCount.setMapOutputValueClass(Text.class);
        //第三,四,五,六,步采用默认暂时不用配置
        //第七步:指定reduce阶段的处理方式和数据类型
        myWordCount.setReducerClass(MainReduce.class);
        //设置reduce阶段k3的类型
        myWordCount.setOutputKeyClass(Text.class);
        //设置reduce阶段v3的类型
        myWordCount.setOutputValueClass(IntWritable.class);
        //第八步:设置输出类型
        myWordCount.setOutputFormatClass(TextOutputFormat.class);
        //设置输出路径
        // 6 指定job的输入原始所在目录
        FileInputFormat.setInputPaths(myWordCount,
                new Path("D:\\javaproject\\20210722_GOUP_11_GYC\\MapperReuceDemo01\\src\\main\\resources\\datas.json"));
        FileOutputFormat.setOutputPath(myWordCount,
                new Path("D:\\javaproject\\20210722_GOUP_11_GYC\\MapperReuceDemo01\\src\\main\\resources\\input"));
        //等待任务结束
        boolean b = myWordCount.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

输出结果:
在这里插入图片描述

新手第一次写博客勿喷!

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值