一.构建maven开发环境
为了开发storm topology, 你需要把storm相关的jar包添加到classpath里面去:
要么手动添加所有相关的jar包, 要么使用maven来管理所有的依赖。storm的jar包发布在Clojars(一个maven库),
如果你使用maven的话,把下面的配置添加在你项目的pom.xml里面。
clojars.org
http://clojars.org/repo
storm
storm
0.5.3
test
二.代码范例
1.Topology 入口点 RollingTopWords
------------类似于hadoop的Job定义
本地模式(嵌入Local):
package storm.starter;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import storm.starter.bolt.MergeObjects;
import storm.starter.bolt.RankObjects;
import storm.starter.bolt.RollingCountObjects;
public class RollingTopWords {
public static void
main(String[] args) throws Exception {
final int TOP_N = 3;
TopologyBuilder builder = new
TopologyBuilder();
builder.setSpout(1, new TestWordSpout(),
5);
builder.setBolt(2, new RollingCountObjects(60,
10), 4)
.fieldsGrouping(1, new Fields("word"));
builder.setBolt(3, new RankObjects(TOP_N),
4)
.fieldsGrouping(2, new Fields("obj"));
builder.setBolt(4, new
MergeObjects(TOP_N))
.globalGrouping(3);
Config conf = new Config();
conf.setDebug(true);
LocalCluster cluster = new LocalCluster(); //
本地模式启动集群
cluster.submitTopology("rolling-demo", conf,
builder.createTopology());
Thread.sleep(10000);
cluster.shutdown();
}
}
部署模式:
package storm.starter;
import storm.starter.bolt.MergeObjects;
import storm.starter.bolt.RankObjects;
import storm.starter.bolt.RollingCountObjects;
import backtype.storm.Config;
import backtype.storm.StormSubmitter;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class RollingTopWords {
public static void
main(String[] args) throws Exception {
final int TOP_N = 3;
TopologyBuilder builder = new
TopologyBuilder();
builder.setSpout(1, new TestWordSpout(),
5);
builder.setBolt(2, new RollingCountObjects(60,
10), 4).fieldsGrouping(
1, new Fields("word"));
builder.setBolt(3, new RankObjects(TOP_N),
4).fieldsGrouping(2,
new Fields("obj"));
builder.setBolt(4, new
MergeObjects(TOP_N)).globalGrouping(3);
Config conf = new Config();
conf.setDebug(true);
conf.setNumWorkers(20);
conf.setMaxSpoutPending(5000);
StormSubmitter.submitTopology("demo",
conf,
builder.createTopology());
Thread.sleep(10000);
}
}
2. 直接使用内置的TestWordSpout(随机产生一个word)
TestWordSpout
package backtype.storm.testing;
import backtype.storm.topology.OutputFieldsDeclarer;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import java.util.Random;
import org.apache.log4j.Logger;
public class TestWordSpout implements IRichSpout {
public static Logger LOG
= Logger.getLogger(TestWordSpout.class);
boolean
_isDistributed;
SpoutOutputCollector
_collector;
public TestWordSpout()
{
this(true);
}
public
TestWordSpout(boolean isDistributed) {
_isDistributed = isDistributed;
}
public boolean
isDistributed() {
return _isDistributed;
}
public void open(Map
conf, TopologyContext context, SpoutOutputCollector collector)
{
_collector = collector;
}
public void close()
{
}
public void nextTuple()
{
Utils.sleep(100);
final String[] words = new String[] {"nathan",
"mike", "jackson", "golda", "bertels"};
final Random rand = new Random();
final String word =
words[rand.nextInt(words.length)];
_collector.emit(new Values(word));
}
public void ack(Object
msgId) {
}
public void fail(Object
msgId) {
}
public void
declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
3.各环节处理Bolt
RollingCountObjects 滚动计数word,并通过定时触发时间,清空计数列表
package storm.starter.bolt;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@SuppressWarnings("serial")
public class RollingCountObjects implements IRichBolt {
private
HashMap _objectCounts
= new HashMap
long[]>();
private int
_numBuckets;
private transient Thread
cleaner;
private OutputCollector
_collector;
private int
_trackMinutes;
public
RollingCountObjects(int numBuckets, int trackMinutes) {
_numBuckets = numBuckets;
_trackMinutes = trackMinutes;
}
public long totalObjects
(Object obj) {
long[] curr = _objectCounts.get(obj);
long total = 0;
for (long l: curr) {
total+=l;
}
return total;
}
public int currentBucket
(int buckets) {
return (currentSecond() /
secondsPerBucket(buckets)) % buckets;
}
public int
currentSecond() {
return (int) (System.currentTimeMillis() /
1000);
}
public int
secondsPerBucket(int buckets) {
return (_trackMinutes * 60 / buckets);
}
public long
millisPerBucket(int buckets) {
return (long) secondsPerBucket(buckets) *
1000;
}
@SuppressWarnings("rawtypes")
public void prepare(Map
stormConf, TopologyContext context, OutputCollector collector)
{
_collector = collector;
cleaner = new Thread(new Runnable() {
@SuppressWarnings("unchecked")
public
void run() {
Integer lastBucket =
currentBucket(_numBuckets);
while(true) {
int
currBucket = currentBucket(_numBuckets);
if(currBucket!=lastBucket) {
int bucketToWipe =
(currBucket + 1) % _numBuckets;
synchronized(_objectCounts)
{
Set objs = new
HashSet(_objectCounts.keySet());
for (Object obj: objs) {
long[] counts =
_objectCounts.get(obj);
long currBucketVal =
counts[bucketToWipe];
counts[bucketToWipe] = 0; //
*这行代码很关键*
long total =
totalObjects(obj);
if(currBucketVal!=0) {
_collector.emit(new Values(obj, total));
}
if(total==0) {
_objectCounts.remove(obj);
}
}
}
lastBucket =
currBucket;
}
long delta
= millisPerBucket(_numBuckets) - (System.currentTimeMillis() %
millisPerBucket(_numBuckets));
Utils.sleep(delta);
}
}
});
cleaner.start();
}
public void
execute(Tuple tuple) {
Object obj = tuple.getValue(0);
int bucket = currentBucket(_numBuckets);
synchronized(_objectCounts) {
long[]
curr = _objectCounts.get(obj);
if(curr==null) {
curr = new
long[_numBuckets];
_objectCounts.put(obj,
curr);
}
curr[bucket]++;
_collector.emit(new Values(obj, totalObjects(obj)));
_collector.ack(tuple);
}
}
public void cleanup()
{
}
public void
declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("obj",
"count"));
}
}
RankObjects
package storm.starter.bolt;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.json.simple.JSONValue;
@SuppressWarnings("serial")
public class RankObjects implements IBasicBolt {
@SuppressWarnings("rawtypes")
List _rankings = new
ArrayList();
int _count;
Long _lastTime =
null;
public RankObjects(int
n) {
_count = n;
}
@SuppressWarnings("rawtypes")
private int
_compare(List one, List two) {
long valueOne = (Long) one.get(1);
long valueTwo = (Long) two.get(1);
long delta = valueTwo - valueOne;
if(delta > 0) {
return
1;
} else if (delta < 0) {
return
-1;
} else {
return
0;
}
} //end compare
private Integer
_find(Object tag) {
for(int i = 0; i <
_rankings.size(); ++i) {
Object cur
= _rankings.get(i).get(0);
if
(cur.equals(tag)) {
return i;
}
}
return null;
}
@SuppressWarnings("rawtypes")
public void prepare(Map
stormConf, TopologyContext context) {
}
@SuppressWarnings("rawtypes")
public void
execute(Tuple tuple, BasicOutputCollector collector) {
Object tag = tuple.getValue(0);
Integer existingIndex = _find(tag);
if (null != existingIndex) {
_rankings.set(existingIndex, tuple.getValues());
} else {
_rankings.add(tuple.getValues());
}
Collections.sort(_rankings, new
Comparator() {
public int
compare(List o1, List o2) {
return _compare(o1,
o2);
}
});
if (_rankings.size() > _count)
{
_rankings.remove(_count);
}
long currentTime =
System.currentTimeMillis();
if(_lastTime==null || currentTime
>= _lastTime + 2000) {
collector.emit(new
Values(JSONValue.toJSONString(_rankings)));
_lastTime
= currentTime;
}
}
public void cleanup()
{
}
public void
declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("list"));
}
}
MergeObjects 对排序结果进行归并
package storm.starter.bolt;
import org.apache.log4j.Logger;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.json.simple.JSONValue;
@SuppressWarnings("serial")
public class MergeObjects implements IBasicBolt {
public static Logger LOG
= Logger.getLogger(MergeObjects.class);
@SuppressWarnings({
"rawtypes", "unchecked" })
private
List _rankings = new
ArrayList();
int _count = 10;
Long _lastTime;
public MergeObjects(int
n) {
_count = n;
}
@SuppressWarnings("rawtypes")
private int
_compare(List one, List two) {
long valueOne = (Long) one.get(1);
long valueTwo = (Long) two.get(1);
long delta = valueTwo - valueOne;
if(delta > 0) {
return
1;
} else if (delta < 0) {
return
-1;
} else {
return
0;
}
} //end compare
private Integer
_find(Object tag) {
for(int i = 0; i <
_rankings.size(); ++i) {
Object cur
= _rankings.get(i).get(0);
if
(cur.equals(tag)) {
return i;
}
}
return null;
}
@SuppressWarnings("rawtypes")
public void prepare(Map
stormConf, TopologyContext context) {
}
@SuppressWarnings({
"unchecked", "rawtypes" })
public void
execute(Tuple tuple, BasicOutputCollector collector) {
List merging
= (List) JSONValue.parse(tuple.getString(0));
for(List pair : merging) {
Integer
existingIndex = _find(pair.get(0));
if (null
!= existingIndex) {
_rankings.set(existingIndex,
pair);
} else
{
_rankings.add(pair);
}
Collections.sort(_rankings, new
Comparator() {
public int compare(List o1,
List o2) {
return _compare(o1, o2);
}
});
if
(_rankings.size() > _count) {
_rankings.subList(_count,
_rankings.size()).clear();
}
}
long currentTime =
System.currentTimeMillis();
if(_lastTime==null || currentTime
>= _lastTime + 2000) {
String
fullRankings = JSONValue.toJSONString(_rankings);
collector.emit(new Values(fullRankings));
LOG.info("Rankings: " + fullRankings);
_lastTime
= currentTime;
}
}
public void cleanup()
{
}
public void
declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("list"));
}
}