【Storm笔记】05-单词计数案例

最新推荐文章于 2020-12-14 22:46:56 发布

一直不懂

最新推荐文章于 2020-12-14 22:46:56 发布

阅读量273

点赞数

分类专栏： Storm笔记

本文链接：https://blog.csdn.net/shenchaohao12321/article/details/85110221

版权

Storm笔记专栏收录该内容

10 篇文章 0 订阅

订阅专栏

1、功能说明

设计一个topology，来实现对文档里面的单词出现的频率进行统计。

整个topology分为三个部分：

RandomSentenceSpout：数据源，在已知的英文句子中，随机发送一条句子出去。
SplitSentenceBolt：负责将单行文本记录（句子）切分成单词
WordCountBolt：负责对单词的频率进行累加

2、Storm程序驱动类

public class WordCountTopology {

   public static void main(String[] args) throws Exception {

      System.out.println("开始构造topology");
      // 组装topology
      TopologyBuilder builder = new TopologyBuilder();

      String spoutId = "s_words";
      String splitBoltId = "b_split";
      String countBoltId= "b_count";
      // 添加spout：spoutid，spout实例，执行spout的executor数量
      builder.setSpout(spoutId, new RandomSentenceSpout(), 2);
      // 添加bolt：boltid，bolt实例，执行bolt的executor数量，上游的spoutid及路由方式
      builder.setBolt(splitBoltId, new SplitSentenceBolt(), 4).shuffleGrouping(spoutId);
      // 添加bolt：boltid，bolt实例，执行bolt的executor数量，上游的spoutid及路由方式
      builder.setBolt(countBoltId, new CountWordBolt(), 4).fieldsGrouping(splitBoltId, new Fields("word"));
      
      // 设置运行参数
      Config conf = new Config();
      conf.setDebug(true);
      // worker 数量
      conf.setNumWorkers(3);

      // 提交topology
      String topologyName = "word-count";
      StormSubmitter.submitTopology(topologyName, conf, builder.createTopology());
      System.out.println("提交topology");
   }
   
}

3、RandomSentenceSpout的实现及生命周期

public class RandomSentenceSpout extends BaseRichSpout {
   private static final long serialVersionUID = -305466827631750450L;

   SpoutOutputCollector _collector;
   Random _rand;

   @Override
   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
      _collector = collector;
      _rand = new Random();
   }

   @Override
   public void nextTuple() {
      Utils.sleep(100);
      String[] sentences = new String[] { sentence("the cow jumped over the moon"),
            sentence("an apple a day keeps the doctor away"), sentence("four score and seven years ago"),
            sentence("snow white and the seven dwarfs"), sentence("i am at two with nature") };
      final String sentence = sentences[_rand.nextInt(sentences.length)];

      System.out.println("发出 tuple: " + sentence);

      _collector.emit(new Values(sentence));
   }

   protected String sentence(String input) {
      return input;
   }

   @Override
   public void ack(Object id) {
      // 不做ack保证
   }

   @Override
   public void fail(Object id) {
      // 不做ack保证
   }

   @Override
   public void declareOutputFields(OutputFieldsDeclarer declarer) {
      declarer.declare(new Fields("word"));
   }

}

4、SplitSentenceBolt的实现及生命周期

public class SplitSentenceBolt extends BaseRichBolt {

   private static final long serialVersionUID = -4052979219004386147L;

   private OutputCollector _collector;

   @Override
   public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
      _collector = collector;
   }

   @Override
   public void execute(Tuple input) {
      System.out.println("收到 tuple: " + input);
      String[] words = input.getValue(0).toString().split(" ");
      for(String word : words) {
         List<Object> outputTuple = new Values(word);
         System.out.println("发出 tuple: " + outputTuple);
         _collector.emit(input, outputTuple);
      }
   }

   /**
    * 设定发出的tuple的数据字段，需要与emit的tuple一致
    */
   @Override
   public void declareOutputFields(OutputFieldsDeclarer declarer) {
      declarer.declare(new Fields("word"));
   }
   
}

5、WordCountBolt的实现及生命周期

public class WordCountBolt extends BaseBasicBolt {
   
   private static final long serialVersionUID = 5406465393295822560L;
   
   Map<String, Integer> counts = new HashMap<String, Integer>();

   @Override
   public void execute(Tuple tuple, BasicOutputCollector collector) {
      String word = tuple.getString(0);
      Integer count = counts.get(word);
      if (count == null) {
         count = 0;
      }
      count++;
      counts.put(word, count);
      System.out.println("word count: " + word + ":" + count);
   }
   
   /**
    * 设定发出的tuple的数据字段，需要与emit的tuple一致
    */
   @Override
   public void declareOutputFields(OutputFieldsDeclarer declarer) {
      // 不需要发出tuple
   }

   /**
    * 在任务结束（kill）时调用：打印最终结果
    */
   @Override
   public void cleanup() {
      System.out.println("-----------------最终结果开始-----------------------");
      List<String> keys = new ArrayList<String>();
      keys.addAll(this.counts.keySet());
      Collections.sort(keys);

      for (String key : keys) {
         System.out.println(key + " : " + this.counts.get(key));
      }

      System.out.println("-----------------最终结果结束-----------------------");
   }
}