spout数据源:message queue,其他数据源都是不正常的,比如读文件,在企业中是不会这样用的
MQ,db,文件
直接流数据源只有消息队列一种才适合storm流计算特点
从db只能读配置文件之类,不能增量读
log文件增量数据:1读出内容写入消息队列,2,storm处理
storm作业是分布式应用,而文件只在一个节点上
直接说那个读文件的例子,如果是下面的情况:
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MySpout(), 2); //线程数是2,读文件会出现线程不安全
builder.setBolt("bolt", new MyBolt(), 1).shuffleGrouping("spout");
如果是这种方式读文件的话是读出两份数据的
但是如果是消息队列的话,消息队列的机制:你消费完了之后,消息队列就消费不到了。所以消息队列不会出现这种情况。
而bolt阶段可以开并发
代码:
spout端:
package wordcount; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import java.util.Map; import java.util.Random; public class RandomSentenceSpout extends BaseRichSpout { SpoutOutputCollector _collector; Random _rand; @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; _rand = new Random(); } String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away", "four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" }; @Override public void nextTuple() { Utils.sleep(100); for(String sentence : sentences){ _collector.emit(new Values(sentence)); } Utils.sleep(10*1000); } @Override public void ack(Object id) { } @Override public void fail(Object id) { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } }其中一个bolt,做格式化处理
package wordcount; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.FailedException; import backtype.storm.topology.IBasicBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import java.util.Map; /** * Created by Administrator on 2016/10/6. */ public class Mysplit implements IBasicBolt { /* m每个bolt最好序列化一下,免得开高并发的时候出错! */ private static final long serialVersionUID = 1L; String patton; public Mysplit(String patton){ this.patton=patton; } @Override public void prepare(Map map, TopologyContext topologyContext) { } @Override public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) { try{ String sen = tuple.getString(0); if(sen != null){ for(String word : sen.split(patton)){ //把每个单词转化成list发送过去 basicOutputCollector.emit(new Values(word)); } } }catch (FailedException e){ e.printStackTrace(); } } @Override public void cleanup() { } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { outputFieldsDeclarer.declare(new Fields("word")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } }最后一个bolt,做单词统计
package wordcount; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.task.ShellBolt; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import storm.starter.spout.RandomSentenceSpout; import java.util.HashMap; import java.util.Map; /** * This topology demonstrates Storm's stream groupings and multilang capabilities. */ public class WordCountTopology { public static class WordCount extends BaseBasicBolt { Map<String, Integer> counts = new HashMap<String, Integer>(); @Override public void execute(Tuple tuple, BasicOutputCollector collector) { String word = tuple.getString(0); Integer count = counts.get(word); if (count == null) count = 0; count++; counts.put(word, count); System.out.println(Thread.currentThread()+" word="+word+"; count="+count); collector.emit(new Values(word, count)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "count")); } } public static void main(String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new RandomSentenceSpout(), 1); builder.setBolt("split", new Mysplit(" "), 8).shuffleGrouping("spout"); builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word")); Config conf = new Config(); conf.setDebug(true); if (args != null && args.length > 0) { conf.setNumWorkers(3); StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); } else { conf.setMaxTaskParallelism(3); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("word-count", conf, builder.createTopology()); Thread.sleep(10000); cluster.shutdown(); } } }