拓扑结构如下:
上面的示意图中有4个组件,分别为一个spout和3个bolt,当数据源spout取得数据(可以是一个句子,里面包含多个单词)以后,发送给SplitBolt进行切分,然后由CountBolt进行统计结果,最终由ReportBolt记录结果。
WordSpout
package com.zyt.storm.spout;
import java.util.Map;
import com.zyt.storm.util.ThreadUtils;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class WordSpout implements IRichSpout{
private static final long serialVersionUID = -6242388305868128294L;
//数据源Spout对外输出数据的 连接器,将其定义成属性,后面会在open方法中初始化
private SpoutOutputCollector collector;
private int index = 0;
//定义需要被发送的数据(一些单词),它是一个String类型的数组,每一个元素都是一句句子,单词之间用空格隔开
private String[] sentences = {
"my dog has fieas",
"i like cold beverages",
"the dog ate my homeword",
"don't have a cow man",
"i don't think i like fleas"
};
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
// TODO Auto-generated method stub
//初始化spout
this.collector = collector;
}
/**
* 轮询
*/
@Override
public void nextTuple() {
/*
* 挨个取出数组sentences中的每一个元素,封装成tuple类型 并 发送出去
*/
collector.emit(new Values(sentences[index]));
index++;
//如果索引超范围了又让它归零,也就是又从头开始发送数组sentences中的数据
if(index >= sentences.length){
index = 0;
}
ThreadUtils.waitForSeconds(1);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
//声明发送数据的字段名称为"sentence"
declarer.declare(new Fields("sentence"));
}
@Override
public void ack(Object arg0) {
// TODO Auto-generated method stub
}
@Override
public void activate() {
// TODO Auto-generated method stub
}
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public void deactivate() {
// TODO Auto-generated method stub
}
@Override
public void fail(Object arg0) {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}
WordSplitBolt
package com.zyt.storm.bolt;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordSplitBolt implements IRichBolt{
private static final long serialVersionUID = 1315884922334803899L;
//处理器Bolt向下发送数据的连接器,将其定义成属性,后面会在prepare方法中初始化
private OutputCollector collector;
/**
* 初始化
*/
@Override
public void prepare(Map stormconf, TopologyContext context, OutputCollector collector) {
// TODO Auto-generated method stub
this.collector = collector;
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
//按WordSpout发送的字段"sentence"接收数据
String sentence = input.getStringByField("sentence");
//将接受到的每一句句子 按空格分割 ,放入words数组中
String[] words = sentence.split(" ");
//将words数组中的单词挨个发送下去(封装成tuple类型)
for (String word : words) {
collector.emit(new Values(word));
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 定义数据发送的字段为"word"
declarer.declare(new Fields("word"));
}
@Override
public void cleanup() {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}
WordCountBolt
package com.zyt.storm.bolt;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordCountBolt implements IRichBolt{
private static final long serialVersionUID = 458059962486086617L;
//处理器Bolt向下发送数据的连接器,将其定义成属性,后面会在prepare方法中初始化
private OutputCollector collector;
//map集合用于单词统计的缓存
private Map<String, Long> counts;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
// 初始化连接器
this.collector = collector;
//初始化Map集合
this.counts = new HashMap<String, Long>();
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
//按字段"word"接收WordSplitBolt发出的数据
String word = input.getStringByField("word");
//定义每个单词word对应的个数count(Map的key是word,value是count)
Long count = counts.get(word);
//如果count为空,说明map中还没有这个word
if(count == null){
count = 0L;//将count初始化为0
}
count++;//否则,说明这个word已经存在与map中,所以,只要将它对应的数量count加1
//将KV值放入map中,map中的K是不会重复的
counts.put(word, count);
//发送两个数据,每个处理过的单词及其对应数量都向下发送一次,WordReportBolt中会进一步更新单词及对应数量
collector.emit(new Values(word,count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 定义向下发送的字段为"word"和"count"
declarer.declare(new Fields("word","count"));
}
@Override
public void cleanup() {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}
WordReportBolt
package com.zyt.storm.bolt;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
public class WordReportBolt implements IRichBolt {
private static final long serialVersionUID = 9148265419528996239L;
//处理器Bolt向下发送数据的连接器,将其定义成属性,后面会在prepare方法中初始化
private OutputCollector collector;
//map集合用于最终结果的缓存
private Map<String, Long> counts;
@Override
public void prepare(Map stormconf, TopologyContext context, OutputCollector collector) {
// 初始化Bolt
this.collector = collector;
this.counts = new HashMap<String, Long>();
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
//按字段接收WordCountBolt发送的数据
String word = input.getStringByField("word");
Long count = input.getLongByField("count");
counts.put(word, count);//将每接收到的一对数据放入map集合中,相同的word会更新它的数量
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 不用再继续发送数据了
}
/**
* 在bolt停止的时候执行该方法
*/
@Override
public void cleanup() {
// TODO Auto-generated method stub
System.out.println("-------------------FINAL COUNTS----------------");
//遍历map集合中的元素,即可得到单词及其对应数量
for (String key : counts.keySet()) {
System.out.println(key +":"+counts.get(key));
}
System.out.println("-----------------------------------------");
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}
WordTopology
package com.zyt.storm.topology;
import com.zyt.storm.bolt.WordCountBolt;
import com.zyt.storm.bolt.WordReportBolt;
import com.zyt.storm.bolt.WordSplitBolt;
import com.zyt.storm.spout.WordSpout;
import com.zyt.storm.util.ThreadUtils;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class WordTopology {
//定义常量
private static final String WORD_SPOUT_ID = "word-spout"; //WordSpout类的名称
private static final String SPLIT_BOLT_ID = "split-bolt";
private static final String COUNT_BOLT_ID = "count-bolt";
private static final String REPORT_BOLT_ID = "report-bolt";
private static final String WORD_TOPOLOGY_ID = "wordcount-topology";
public static void main(String[] args) {
// TODO Auto-generated method stub
//创建spout,bolt组件对象
WordSpout spout = new WordSpout();
WordSplitBolt splitBolt = new WordSplitBolt();
WordCountBolt countBolt = new WordCountBolt();
WordReportBolt reportBolt = new WordReportBolt();
//创建拓扑对象
TopologyBuilder builder = new TopologyBuilder();
//设置spout
builder.setSpout(WORD_SPOUT_ID, spout);
//WordSpout-->SplitBolt,使用随机分组
builder.setBolt(SPLIT_BOLT_ID, splitBolt,5).shuffleGrouping(WORD_SPOUT_ID);
//WordSplitBolt --> WordCountBolt,使用字段分组,"word"字段相同的应该进入同一个Bolt并发度中,这样计数才不会出错
builder.setBolt(COUNT_BOLT_ID, countBolt,5).fieldsGrouping(SPLIT_BOLT_ID, new Fields("word"));
//WordCountBolt --> WordReportBolt,使用全局分组,使最后所有的tuple都进入同一个bolt并发度中。
builder.setBolt(REPORT_BOLT_ID, reportBolt).globalGrouping(COUNT_BOLT_ID);
//创建配置
Config config = new Config();
// config.setNumWorkers(2);
config.setDebug(true);
//本地模式
LocalCluster cluster = new LocalCluster();
//提交拓扑
cluster.submitTopology(WORD_TOPOLOGY_ID, config, builder.createTopology());
ThreadUtils.waitForSeconds(10);
cluster.killTopology(WORD_TOPOLOGY_ID);
cluster.shutdown();
}
}
工具类ThreadUtils
package com.zyt.storm.util;
/**
* 线程工具类
* @author Administrator
*
*/
public class ThreadUtils {
private ThreadUtils() {
}
/**
* 以秒为单位,线程休息
* @param seconds
*/
public static void waitForSeconds(int seconds){
try {
Thread.sleep(seconds * 1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 以毫秒为单位,线程休息
* @param milliseconds
*/
public static void waitForMillis(long milliseconds){
try {
Thread.sleep(milliseconds);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}