storm统计

创建数据流SentenceSpout

package wang.doug.river.storm.store;


import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;
import java.util.Random;

public class SentenceSpout extends BaseRichSpout {
    private static final Logger logger = LoggerFactory.getLogger(SentenceSpout.class);

    public SentenceSpout() {
        logger.info("gouzao");
    }

    SpoutOutputCollector _collector;
    Random _rand;
    int _index = 0;
    String[] sentences = new String[]{
            "hello you",
            "hello me"
//            "1,2019092410010000,100.00",
//            "And nodding by the fire",
//            "take down this book And"
//            "And slowly read",
//            "and dream of the soft look",
//            "Your eyes had once",
//            "and of their shadows deep",
//            "How many loved your moments of glad grace",
//            "And loved your beauty with love false or true"

    };


    /**
     * 初始化操作,只执行一次
     * @param conf
     * @param context
     * @param collector
     */
    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        logger.info("open...");
        _collector = collector;
        _rand = new Random();
    }

    /**
     * 发送数据的机制,根据设置的并行数量来
     */
    public void nextTuple() {
        if(_index > sentences.length-1) {
            return;
        }
        logger.info("nextTuple...");

//        final String sentence = sentences[_rand.nextInt(sentences.length)];

//       logger.info("Emitting tuple: "+ sentence);


        _collector.emit(new Values(sentences[_index]));
        _index ++;
        Utils.sleep(1000);
    }


    @Override
    public void ack(Object id) {
        logger.info("ack id:" + id);
    }

    @Override
    public void fail(Object id) {
        logger.info("fail id:" + id);
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("word"));
    }


}

用SplitSentenceBolt进行切割

package wang.doug.river.storm.store;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class SplitSentenceBolt implements IRichBolt {

    private static final Logger logger = LoggerFactory.getLogger(SplitSentenceBolt.class);

    private OutputCollector _collector;

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("word"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }

    public void prepare(Map stormConf, TopologyContext context,
                        OutputCollector collector) {
        _collector = collector;
    }

    public void execute(Tuple tuple) {

       // logger.info(tuple.toString());

        String sentence = tuple.getStringByField("word");
       logger.info("sentence:" + sentence);

        String[] words = sentence.split(" ");
        for (String word : words) {
            this._collector.emit(new Values(word));
        }
    }

    public void cleanup() {
        logger.info("cleanup");

    }
}

把切割完的数据进行统计WordCountolt

package wang.doug.river.storm.store;

import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Map;

public class WordCountBolt extends BaseBasicBolt {


    private static final Logger logger = LoggerFactory.getLogger(WordCountBolt.class);

    Map<String, Integer> wordCountMap = new HashMap<String, Integer>();



    public void execute(Tuple tuple, BasicOutputCollector collector) {

//        logger.info("....WordCountBolt.tuple" + tuple.toString());


        String word = tuple.getString(0);
        logger.info("....word:" + word);  //you

        Integer count = wordCountMap.get(word);

        count = (count == null) ? 0 : count;

        count++;
        wordCountMap.put(word, count);

//        logger.info("word:" + word + " count:" + count);
        logger.info("...wcmap:" + wordCountMap);
        /**
         * {hello=1}
         * {me=1, hello=2, you=1}
         */

        collector.emit(new Values(word, count));//
        Utils.sleep(1000);
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {

        declarer.declare(new Fields("word", "count"));
    }
}

WoldSumBolt把WordCountBolt发送过来的数据进行聚合,统计出总数据

package wang.doug.river.storm.store;

import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

public class WordSumBolt extends BaseBasicBolt {

    private static final Logger logger = LoggerFactory.getLogger(WordSumBolt.class);

    Map<String, Integer> counts = new HashMap<String, Integer>();

    public void execute(Tuple tuple, BasicOutputCollector collector) {
        String word = tuple.getStringByField("word");
        Integer count = tuple.getIntegerByField("count");

//        logger.info("----word:" + word + "  count:" + count);
        this.counts.put(word, count);
        logger.info("...wsmap:" + this.counts);
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {

    }

    @Override
    public void cleanup() {
        logger.info("-----------------汇总结果-----------------------");
        List<String> keys = new ArrayList<String>();
        keys.addAll(this.counts.keySet());
        Collections.sort(keys);

        for (String key : keys) {
            logger.info(key + " : " + this.counts.get(key));
        }

    }

}

测试类

package wang.doug.river.storm.store;


import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;


public class WordCountTopology {

    public static void main(String[] args) throws Exception {

        TopologyBuilder builder = new TopologyBuilder();
//4,代表4个数据源,即4个Spout
        builder.setSpout("spout", new SentenceSpout(), 4);

        //ShuffleGrouping:随机选择一个Task
//2,代表有2个splitbolt处理spout发送的数据
        builder.setBolt("split", new SplitSentenceBolt(), 2).shuffleGrouping("spout");


        //filedGrouping:根据Tuple中Fields来做一致性hash,相同hash值的Tuple被发送到相同的
Task。
//2,代表有2个WordCountBolt处理splitbolt发送过来的数据
        builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split", new Fields("word"));
        //GlobalGrouping:所有的Tuple会被发送到某个Bolt中的id最小的那个Task。
//2,代表有2个WordSumBolt处理WordCountBolt发来的数据
        builder.setBolt("sum", new WordSumBolt(), 2).globalGrouping("count");

        Config conf = new Config();
        conf.setDebug(true);

        if (args != null && args.length > 0) {
            //在storm集群中,worker是用来分配的资源。如果一个程序没有指定worker数,那么就会使用默认值
            conf.setNumWorkers(3);
            StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());

        }
        else {
            System.out.println(".........本地模式");
            conf.setMaxTaskParallelism(4); //本地模式下一个组件能够运行的最大线程数

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("word-count", conf, builder.createTopology());

            Thread.sleep(10*60*1000);

            cluster.shutdown();
        }
    }
}

运行测试

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值