Storm 分组

1.shuffle :随机分组

2.field分组
        安装指定filed的key进行hash处理,
        相同的field,一定进入到同一bolt.

     该分组容易产生数据倾斜问题,通过使用二次聚合避免此类问题。

3.使用二次聚合避免倾斜。

App类:

package com.mao.storm.group.shuffle;


import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

/**
 * App
 */
public class App {

    public static void main(String[] args) throws Exception {

        TopologyBuilder builder = new TopologyBuilder();

        //设置spout
        builder.setSpout("wcSpout",new WordCountSpout()).setNumTasks(2);
        //设置creator-Bolt
        builder.setBolt("split-bolt",new SplitBolt(),1).shuffleGrouping("wcSpout").setNumTasks(1);

        //设置countor-Bolt
        builder.setBolt("count-Bolt1",new WordCountBolt(),1).shuffleGrouping("split-bolt").setNumTasks(1);

        //设置countor-Bolt
        builder.setBolt("count-Bolt2",new WordCountBolt(),3).fieldsGrouping("count-Bolt1",new Fields("word")).setNumTasks(3);


        Config config = new Config();
        config.setNumWorkers(2);
        config.setDebug(true);

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("wcShuffle",config,builder.createTopology());

        Thread.sleep(20000);

        cluster.shutdown();
        System.out.println("over");

    }
}

SplitBolt类:

package com.mao.storm.group.shuffle;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

public class SplitBolt implements IRichBolt {

    private TopologyContext context;
    private OutputCollector collector;

    public void prepare(Map map, TopologyContext context, OutputCollector collector) {
        this.context = context;
        this.collector = collector;
    }

    public void execute(Tuple tuple) {
        String line = tuple.getString(0);
        String[] arr = line.split(" ");
        for (String s : arr){
            collector.emit(new Values(s,1));
        }
    }

    public void cleanup() {

    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word","count"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}
WordCountBolt类:
package com.mao.storm.group.shuffle;

import com.mao.storm.util.Util;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;

/**
 * countbolt,使用二次聚合,解决数据倾斜问题。
 * 一次聚合和二次聚合使用field分组,完成数据的最终统计。
 * 一次聚合和上次split工作使用
 */
public class WordCountBolt implements IRichBolt {
    private TopologyContext context;
    private OutputCollector collector;
    private Map<String,Integer> map1;
    private long lastEmitTime = 0;
    private long duration = 5000;
    public void prepare(Map map, TopologyContext context, OutputCollector collector) {
        this.context = context;
        this.collector = collector;
        map1 = new HashMap<String, Integer>();
    }


    public void execute(Tuple tuple) {
        String word = tuple.getString(0);
        Integer count = tuple.getInteger(1);
        Util.sendToLocalhost(this, word);
        if (!map1.containsKey(word)){
            map1.put(word,count);
        }else {
            map1.put(word,map1.get(word) + count);
        }
        long nowTime = System.currentTimeMillis();
        if (nowTime - lastEmitTime >duration){
            for (Map.Entry<String,Integer> entry : map1.entrySet()){
                collector.emit(new Values(entry.getKey(),entry.getValue()));
            }
            map1.clear();
            lastEmitTime = nowTime;
        }
    }

    public void cleanup() {
        for (Map.Entry<String,Integer> entry : map1.entrySet()){
            System.out.println("wordCountNums:"+entry.getKey()+" : "+entry.getValue());
        }
    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word","count"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}
WordCountSpout类:
package com.mao.storm.group.shuffle;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

public class SplitBolt implements IRichBolt {

    private TopologyContext context;
    private OutputCollector collector;

    public void prepare(Map map, TopologyContext context, OutputCollector collector) {
        this.context = context;
        this.collector = collector;
    }

    public void execute(Tuple tuple) {
        String line = tuple.getString(0);
        String[] arr = line.split(" ");
        for (String s : arr){
            collector.emit(new Values(s,1));
        }
    }

    public void cleanup() {

    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word","count"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值