Storm可同时处理窗口内的所有tuple。窗口可以从时间或数量上来划分,由如下两个因素决定:
窗口的长度(windowLength),可以是时间间隔或Tuple数量;
滑动间隔(sliding Interval),可以是时间间隔或Tuple数量;
※要确保topology的过期时间大于窗口的大小加上滑动间隔
Sliding Window:滑动窗口
按照固定的时间间隔或者Tuple数量滑动窗口。
如果滑动间隔和窗口大小一样则等同于滚窗,
如果滑动间隔大于窗口大小则会丢失数据,
如果滑动间隔小于窗口大小则会窗口重叠。
Tumbling Window:滚动窗口
元组被单个窗口处理,一个元组只属于一个窗口,不会有窗口重叠。根据经验其实一般用滚动就可以了
构造builder的时候支持以下的配置
(时间和数量的排列组合):
withWindow(Count windowLength, Count slidingInterval)
滑窗 窗口长度:tuple数, 滑动间隔: tuple数
withWindow(Count windowLength)
滑窗 窗口长度:tuple数, 滑动间隔: 每个tuple进来都滑
withWindow(Count windowLength, Duration slidingInterval)
滑窗 窗口长度:tuple数, 滑动间隔: 时间间隔
withWindow(Duration windowLength, Duration slidingInterval)
滑窗 窗口长度:时间间隔, 滑动间隔: 时间间隔
withWindow(Duration windowLength)
滑窗 窗口长度:时间间隔, 滑动间隔: 每个tuple进来都滑
withWindow(Duration windowLength, Count slidingInterval)
滑窗 窗口长度:时间间隔, 滑动间隔: 时间间隔
withTumblingWindow(BaseWindowedBolt.Count count)
滚窗 窗口长度:Tuple数
withTumblingWindow(BaseWindowedBolt.Duration duration)
滚窗 窗口长度:时间间隔
以下代码是通过窗口操作来统计50s内每10s的Top-N,结合代码理解开窗操作。
github传送门:https://github.com/ZzzzZzreal/StormGoGo/blob/master/src/main/java/window/WindowTopN.java
public class WindowTopN {
/**
* 产生数据的Spout,随机生成指定word并发出
*/
static class MySpout extends BaseRichSpout {
String[] words = {"aa","bb","cc","dd","ee","ff","gg"};
Random random = new Random();
SpoutOutputCollector collector;
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector=collector;
}
public void nextTuple() {
Utils.sleep(500);
collector.emit(new Values(words[random.nextInt(words.length)]));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
/**
* windowBolt,实现窗口操作,并统计指定时间内单位时间间隔内的Top3
*/
static class MyWindowBolt extends BaseWindowedBolt {
//定义一个HashMap作wordcount用
HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
public void execute(TupleWindow inputWindow) {
//获取窗口内的内容
List<Tuple> words = inputWindow.get();
//wordcount
for (int i = 0; i < words.size(); i++) {
String word = words.get(i).getString(0);
Integer count = hashMap.get(word);
if (count == null)
count = 0;
count++;
hashMap.put(word, count);
}
//这里将map.entrySet()转换成list
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(hashMap.entrySet());
//然后通过比较器来实现排序
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
//升序排序
public int compare(Map.Entry<String, Integer> o1,
Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
//输出top3
System.out.println("Top3:");
for (int i = 0; i < 3; i++) {
System.out.println("\t" + list.get(i).getKey() + ":" + list.get(i).getValue());
}
System.out.println("--------->");
/*
*以下代码用于理解window的某些基础操作
*/
// List<Tuple> tuples = inputWindow.get();
// List<Tuple> expired = inputWindow.getExpired();//获取到过期的tuple
// List<Tuple> tuples = inputWindow.getNew();//获取到和上个窗口相比新加进去的tuple
// System.out.println("滑动了一下!");
// System.out.println(tuples.size());
// System.out.println(expired.size());
// for (Tuple tuple:tuples){
//
// System.out.println(tuple.getValue(0));
// }
}
public static void main(String[] args) {
//构建拓扑
TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout("spout", new MySpout());
//指定窗口长度以及滑动间隔
topologyBuilder.setBolt("bolt", new MyWindowBolt().withWindow(BaseWindowedBolt.Duration.seconds(50), BaseWindowedBolt.Duration.seconds(10))).shuffleGrouping("spout");
/*
*以下代码简单理解定义窗口时时间和数量的排列组合
*/
// topologyBuilder.setBolt("bolt", new MyWindowBolt().withTumblingWindow(BaseWindowedBolt.Count.of(10)))
// .shuffleGrouping("spout");//这里要注意withTumblingWindow(滑动间隔和窗口长度是一样的)和withWindow的区别,如果忘了点进源码看一下(withWindow是一个tuple滑动一次)
// topologyBuilder.setBolt("bolt",
new new MyWindowBolt().withWindow(new BaseWindowedBolt.Duration(10, TimeUnit.SECONDS)))
// new MyWindowBolt().withWindow(BaseWindowedBolt.Duration.seconds(50), BaseWindowedBolt.Duration.seconds(10)))//时间的两种定义方式
// .shuffleGrouping("spout");
LocalCluster localCluster = new LocalCluster();
Config config = new Config();
config.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, 100);//要保证超时时间大于等于窗口长度+滑动间隔长度
localCluster.submitTopology("a", config, topologyBuilder.createTopology());
}
}
}