参考了storm-starter和很多网上的例子。
总共需要三个bolt:
//bolt1,负责实时的计算某个但是的统计量;负责清空最旧的数据。
package com.cucc.roam.storm.bolt;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
public class RollingCountObjects extends BaseRichBolt {
public static Logger LOG = Logger.getLogger(RollingCountObjects.class);
private HashMap<Object, long[]> _objectCounts = new HashMap<Object, long[]>();
private int _numBuckets;//60
private transient Thread cleaner;
private OutputCollector _collector;
private int _trackMinutes;//10
public RollingCountObjects(int numBuckets, int trackMinutes) {
_numBuckets = numBuckets;
_trackMinutes = trackMinutes;
}
public long totalObjects (Object obj) {
long[] curr = _objectCounts.get(obj);
long total = 0;
for (long l: curr) {
total+=l;
}
return total;
}
public int currentBucket (int buckets) {
return (currentSecond() / secondsPerBucket(buckets)) % buckets;
}
public int currentSecond() {//系统时间,秒
return (int) (System.currentTimeMillis() / 1000);
}
public int secondsPerBucket(int buckets) {
return (_trackMinutes * 60 / buckets);//每个桶多少秒,
}
public long millisPerBucket(int buckets) {
return (long) secondsPerBucket(buckets) * 1000;
}
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
_collector = collector;
cleaner = new Thread(new Runnable() {
public void run() {
Integer lastBucket = currentBucket(_numBuckets);
StringBuffer sb = new StringBuffer();
while(true) {
int currBucket = currentBucket(_numBuckets);
sb.setLength(0);
sb.append("\n##########\nbegin,线程while循环: 当前的桶为:" + currBucket);
if(currBucket!=lastBucket) {
sb.append("\n线程while循环:之前的桶数为:" + lastBucket);
int bucketToWipe = (currBucket + 1) % _numBuckets;
sb.append("\n线程while循环:要擦除掉的桶为:" + bucketToWipe);
synchronized(_objectCounts) {
Set objs = new HashSet(_objectCounts.keySet());
for (Object obj: objs) {
long[] counts = _objectCounts.get(obj);
long currBucketVal = counts[bucketToWipe];
sb.append("\n线程while循环:擦除掉的值为:" + currBucketVal+",擦除的对象为"+obj);
sb.append("\n擦出前数组:");//LOG.info
for (long number : counts) {
sb.append(number + ":");
}
counts[bucketToWipe] = 0;
sb.append("\n擦出后数组:");
for (long number : counts) {
sb.append(number + ":");
}
long total = totalObjects(obj);
if(currBucketVal!=0) {
sb.append("\n线程while循环:擦除掉的值为不为0:那就发射数据:obj total"
+ obj + ":" + total);
_collector.emit(new Values(obj, total));
}
if(total==0) {
sb.append("\n线程while循环: 总数为0以后,将obj对象删除,obj="+obj);
_objectCounts.remove(obj);
}
}
}
lastBucket = currBucket;
}
long delta = millisPerBucket(_numBuckets) - (System.currentTimeMillis() % millisPerBucket(_numBuckets));
Utils.sleep(delta);
sb.append("\nsleep="+delta+"毫秒.end#########\n");
LOG.info(sb.toString());
}
}
});
cleaner.start();
}
public void execute(Tuple tuple) {
StringBuffer sb = new StringBuffer();
sb.setLength(0);
Object obj = tuple.getValue(0);
int bucket = currentBucket(_numBuckets);
sb.append("\n=======\nexecute方法:当前值:"+obj+"当前桶:bucket: " + bucket);
synchronized(_objectCounts) {
long[] curr = _objectCounts.get(obj);
if(curr==null) {
curr = new long[_numBuckets];
_objectCounts.put(obj, curr);
sb.append("\n新建,_objectCounts["+_objectCounts.toString()+"]");
}
curr[bucket]++;
sb.append("\nexecute方法:接受到的merchandiseIDS:" + obj.toString() + ",long数组:\n");
for (long number : curr) {
sb.append(number + ":");
}
sb.append("\nexecute方法:发射的数据: " + obj + ":" + totalObjects(obj)+"\n========");
_collector.emit(new Values(obj, totalObjects(obj)));
_collector.ack(tuple);
LOG.info(sb.toString());
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("obj", "count"));
}
}
bolt2维护一个topn的数据,假如bolt2的并发度为3那么总共有三组topN的数据
package com.cucc.roam.storm.bolt;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
public class RankObjects extends BaseBasicBolt {
public static Logger LOG = Logger.getLogger(RankObjects.class);
List<List> _rankings = new ArrayList<List>();
int _count;
Long _lastTime = null;
public RankObjects(int n) {
_count = n;
}
private int _compare(List one, List two) {
long valueOne = (Long) one.get(1);
long valueTwo = (Long) two.get(1);
long delta = valueTwo - valueOne;
if(delta > 0) {
return 1;
} else if (delta < 0) {
return -1;
} else {
return 0;
}
}
private Integer _find(Object tag) {
for(int i = 0; i < _rankings.size(); ++i) {
Object cur = _rankings.get(i).get(0);
if (cur.equals(tag)) {
return i;
}
}
return null;
}
public void execute(Tuple tuple, BasicOutputCollector collector) {
StringBuffer sb = new StringBuffer();
Object tag = tuple.getValue(0);
sb.append("\n~~~~~~~~~\nrank,tag="+tag+",_rankings["+_rankings.toString()+"].");
Integer existingIndex = _find(tag);
if (null != existingIndex) {
sb.append("\nrank,set["+tuple.getValues()+"]..");
_rankings.set(existingIndex, tuple.getValues());
} else {
sb.append("\nrank,add["+tuple.getValues()+"]..");
_rankings.add(tuple.getValues());
}
Collections.sort(_rankings, new Comparator<List>() {
public int compare(List o1, List o2) {
return _compare(o1, o2);
}
});
if (_rankings.size() > _count) {
_rankings.remove(_count);
sb.append("\nremove后,_rankings["+_rankings.toString()+"].");
}
long currentTime = System.currentTimeMillis();
if(_lastTime==null || currentTime >= _lastTime + 2000) {
sb.append("\nrank,emit["+_rankings+"]...");
collector.emit(new Values(new ArrayList(_rankings)));
_lastTime = currentTime;
}
sb.append("\n~~~~~~~~~~~\n");
LOG.info(sb.toString());
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("list"));
}
}
bolt3的作用是合并
package com.cucc.roam.storm.bolt;
import org.apache.log4j.Logger;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
public class MergeObjects extends BaseBasicBolt {
public static Logger LOG = Logger.getLogger(MergeObjects.class);
private List<List> _rankings = new ArrayList();
int _count = 10;
Long _lastTime;
public MergeObjects(int n) {
_count = n;
}
private int _compare(List one, List two) {
long valueOne = (Long) one.get(1);
long valueTwo = (Long) two.get(1);
long delta = valueTwo - valueOne;
if(delta > 0) {
return 1;
} else if (delta < 0) {
return -1;
} else {
return 0;
}
}
private Integer _find(Object tag) {
for(int i = 0; i < _rankings.size(); ++i) {
Object cur = _rankings.get(i).get(0);
if (cur.equals(tag)) {
return i;
}
}
return null;
}
public void execute(Tuple tuple, BasicOutputCollector collector) {
StringBuffer sb = new StringBuffer();
List<List> merging = (List) tuple.getValue(0);
sb.append("\n$$$$$$$$$$$$$\nmerge,get,merging["+merging.toString()+"].");
sb.append("\n 开始_rankings["+_rankings.toString()+"]");
for(List pair : merging) {
Integer existingIndex = _find(pair.get(0));
if (null != existingIndex) {
_rankings.set(existingIndex, pair);
sb.append("");
} else {
_rankings.add(pair);
sb.append("");
}
Collections.sort(_rankings, new Comparator<List>() {
public int compare(List o1, List o2) {
return _compare(o1, o2);
}
});
if (_rankings.size() > _count) {
_rankings.subList(_count, _rankings.size()).clear();
}
}
sb.append("\n结束_rankings["+_rankings.toString()+"]");
long currentTime = System.currentTimeMillis();
if(_lastTime==null || currentTime >= _lastTime + 2000) {
collector.emit(new Values(new ArrayList(_rankings)));
LOG.info("\n最后的实时结果Rankings: " + _rankings);
_lastTime = currentTime;
}
sb.append("\n$$$$$$$$$$$$$\n");
LOG.info(sb.toString());
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("list"));
}
}
=============
转载:http://my.oschina.net/infiniteSpace/blog/309784