创建数据流SentenceSpout
package wang.doug.river.storm.store;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.Random;
public class SentenceSpout extends BaseRichSpout {
private static final Logger logger = LoggerFactory.getLogger(SentenceSpout.class);
public SentenceSpout() {
logger.info("gouzao");
}
SpoutOutputCollector _collector;
Random _rand;
int _index = 0;
String[] sentences = new String[]{
"hello you",
"hello me"
// "1,2019092410010000,100.00",
// "And nodding by the fire",
// "take down this book And"
// "And slowly read",
// "and dream of the soft look",
// "Your eyes had once",
// "and of their shadows deep",
// "How many loved your moments of glad grace",
// "And loved your beauty with love false or true"
};
/**
* 初始化操作,只执行一次
* @param conf
* @param context
* @param collector
*/
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
logger.info("open...");
_collector = collector;
_rand = new Random();
}
/**
* 发送数据的机制,根据设置的并行数量来
*/
public void nextTuple() {
if(_index > sentences.length-1) {
return;
}
logger.info("nextTuple...");
// final String sentence = sentences[_rand.nextInt(sentences.length)];
// logger.info("Emitting tuple: "+ sentence);
_collector.emit(new Values(sentences[_index]));
_index ++;
Utils.sleep(1000);
}
@Override
public void ack(Object id) {
logger.info("ack id:" + id);
}
@Override
public void fail(Object id) {
logger.info("fail id:" + id);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
用SplitSentenceBolt进行切割
package wang.doug.river.storm.store;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
public class SplitSentenceBolt implements IRichBolt {
private static final Logger logger = LoggerFactory.getLogger(SplitSentenceBolt.class);
private OutputCollector _collector;
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
public Map<String, Object> getComponentConfiguration() {
return null;
}
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
_collector = collector;
}
public void execute(Tuple tuple) {
// logger.info(tuple.toString());
String sentence = tuple.getStringByField("word");
logger.info("sentence:" + sentence);
String[] words = sentence.split(" ");
for (String word : words) {
this._collector.emit(new Values(word));
}
}
public void cleanup() {
logger.info("cleanup");
}
}
把切割完的数据进行统计WordCountolt
package wang.doug.river.storm.store;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
public class WordCountBolt extends BaseBasicBolt {
private static final Logger logger = LoggerFactory.getLogger(WordCountBolt.class);
Map<String, Integer> wordCountMap = new HashMap<String, Integer>();
public void execute(Tuple tuple, BasicOutputCollector collector) {
// logger.info("....WordCountBolt.tuple" + tuple.toString());
String word = tuple.getString(0);
logger.info("....word:" + word); //you
Integer count = wordCountMap.get(word);
count = (count == null) ? 0 : count;
count++;
wordCountMap.put(word, count);
// logger.info("word:" + word + " count:" + count);
logger.info("...wcmap:" + wordCountMap);
/**
* {hello=1}
* {me=1, hello=2, you=1}
*/
collector.emit(new Values(word, count));//
Utils.sleep(1000);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
}
WoldSumBolt把WordCountBolt发送过来的数据进行聚合,统计出总数据
package wang.doug.river.storm.store;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
public class WordSumBolt extends BaseBasicBolt {
private static final Logger logger = LoggerFactory.getLogger(WordSumBolt.class);
Map<String, Integer> counts = new HashMap<String, Integer>();
public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getStringByField("word");
Integer count = tuple.getIntegerByField("count");
// logger.info("----word:" + word + " count:" + count);
this.counts.put(word, count);
logger.info("...wsmap:" + this.counts);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
@Override
public void cleanup() {
logger.info("-----------------汇总结果-----------------------");
List<String> keys = new ArrayList<String>();
keys.addAll(this.counts.keySet());
Collections.sort(keys);
for (String key : keys) {
logger.info(key + " : " + this.counts.get(key));
}
}
}
测试类
package wang.doug.river.storm.store;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
public class WordCountTopology {
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
//4,代表4个数据源,即4个Spout
builder.setSpout("spout", new SentenceSpout(), 4);
//ShuffleGrouping:随机选择一个Task
//2,代表有2个splitbolt处理spout发送的数据
builder.setBolt("split", new SplitSentenceBolt(), 2).shuffleGrouping("spout");
//filedGrouping:根据Tuple中Fields来做一致性hash,相同hash值的Tuple被发送到相同的
Task。
//2,代表有2个WordCountBolt处理splitbolt发送过来的数据
builder.setBolt("count", new WordCountBolt(), 2).fieldsGrouping("split", new Fields("word"));
//GlobalGrouping:所有的Tuple会被发送到某个Bolt中的id最小的那个Task。
//2,代表有2个WordSumBolt处理WordCountBolt发来的数据
builder.setBolt("sum", new WordSumBolt(), 2).globalGrouping("count");
Config conf = new Config();
conf.setDebug(true);
if (args != null && args.length > 0) {
//在storm集群中,worker是用来分配的资源。如果一个程序没有指定worker数,那么就会使用默认值
conf.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
}
else {
System.out.println(".........本地模式");
conf.setMaxTaskParallelism(4); //本地模式下一个组件能够运行的最大线程数
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("word-count", conf, builder.createTopology());
Thread.sleep(10*60*1000);
cluster.shutdown();
}
}
}
运行测试
125

被折叠的 条评论
为什么被折叠?



