import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.newegg.storm.util.TupleHelpers;
import backtype.storm.Config;
import backtype.storm.generated.GlobalStreamId;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.RotatingMap;
/**
* Multiple to multiple join
* join tuples during a specified period
* for example we receive tuples as follow:
* stream1: (A,B1,D1),(A,B2,D2)
* stream2: (A,C1),(A,C2)
* then join the two sources, we emit four output tuples:
* (A,B1,D1,C1)
* (A,B1,D1,C2)
* (A,B2,D2,C1)
* (A,B2,D2,C2)
* @author bw67
*
*/
public class MultipleJoinBolt extends BaseRichBolt {
private OutputCollector collector;
private TopologyContext context;
private Fields commonFields;
private Fields[] otherFields;
private Fields outputFields;
private int numSources;
private int expireInSeconds = 10;
private RotatingMap<List<Object>, Map<GlobalStreamId, List<Tuple>>> pending; //<idFields, <sourceComponent,tuples>>
private Map<Fields, GlobalStreamId> fieldLocations; //<fields, sourceComponent>
/**
* multiple join
* @param expireInSeconds
* @param fields
*/
public MultipleJoinBolt(int expireInSeconds, Fields...fields){
this.expireInSeconds = expireInSeconds;
findCommonFields(fields);
findOtherFields(fields);
makeOutputFields();
}
/**
* [[A,B,D], [A,C]] => [A]
* @param fields
*/
private void findCommonFields(Fields...fields){
Set c = new HashSet(fields[0].toList());
for(int i=1;i<fields.length;i++){
c.retainAll(fields[i].toList());
}
commonFields = new Fields(new ArrayList(c));
}
/**
* [[A,B,D], [A,C]] => [[B,D], [C]]
* @param fields
*/
private void findOtherFields(Fields...fields){
otherFields = new Fields[fields.length];
for(int i=0;i<fields.length;i++){
List l = fields[i].toList();
l.removeAll(commonFields.toList());
otherFields[i] = new Fields(l);
}
}
/**
* outputFields: [A,B,D,C]
*/
private void makeOutputFields(){
List fields = commonFields.toList();
for(Fields f : otherFields){
fields.addAll(f.toList());
}
outputFields = new Fields(fields);
}
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.fieldLocations = new HashMap<Fields, GlobalStreamId>();
this.collector = collector;
this.context = context;
//give two buckets <commonFields, <streamId, tuples>
//[A]->{S1->[A,B,D]}
//[A]->{S2->[A,C]}
this.pending = new RotatingMap<List<Object>, Map<GlobalStreamId, List<Tuple>>>(2, new ExpireCallback());
//number of tuple sources
this.numSources = context.getThisSources().size();
mapFieldsToSource();
}
/**
* fieldLocations: Fields -> Source
* [B,D] -> S1
* [C] -> S2
*/
private void mapFieldsToSource(){
for (GlobalStreamId source : context.getThisSources().keySet()) {
Fields fields = context.getComponentOutputFields(source.get_componentId(), source.get_streamId());
for (Fields of : otherFields) {
if(fields.toList().containsAll(of.toList())){
fieldLocations.put(of, source);
}
}
}
}
@Override
public void execute(Tuple input) {
//if time is up, rotate the pending map
if (TupleHelpers.isTickTuple(input)){
pending.rotate();
return;
}
//get the values of id fields
List<Object> id = input.select(commonFields);
//get this source component
GlobalStreamId streamId = new GlobalStreamId(input.getSourceComponent(), input.getSourceStreamId());
//if pending map doesn't contain this id, put it in
if (!pending.containsKey(id)) {
pending.put(id, new HashMap<GlobalStreamId, List<Tuple>>());
}
//get components joined map by id
Map<GlobalStreamId, List<Tuple>> parts = pending.get(id);
//put this tuple to the map
if(!parts.containsKey(streamId)){
parts.put(streamId, new LinkedList<Tuple>());
}
parts.get(streamId).add(input);
//update this active parts into the first bucket of the rotatingmap
pending.put(id, parts);
}
private void emit(List<Object> ids, Map<GlobalStreamId, List<Tuple>> parts){
//partin: [[(A,B1,D1),(A,B2,D2)],[(A,C1),(A,C2)]]
//partout: [[[B1,D1],[B2,D2]],[[C1],[C2]]]
List[][] partin = new List[otherFields.length][];
List[][] partout = new List[otherFields.length][];
//add other fields
for (int i=0;i<otherFields.length;i++){
GlobalStreamId loc = fieldLocations.get(otherFields[i]);
List<Tuple> tuples = parts.get(loc);
List[] values = new List[tuples.size()];
List[] targets = new List[tuples.size()];
for(int j=0;j<tuples.size();j++){
values[j] = tuples.get(j).select(otherFields[i]);
targets[j] = new ArrayList(1);
targets[j].add(tuples.get(j));
}
partout[i] = values;
partin[i] = targets;
}
//targets: [[(A,B1,D1),(A,C1)],[(A,B1,D1),(A,C2)],[(A,B2,D2),(A,C1)],[(A,B2,D2),(A,C2)]]
//results: [[[B1,D1],[C1]],[[B1,D1],[C2]],[[B2,D2],[C1]],[[B2,D2],[C2]]]
List[] targets = partin[0];
List[] results = partout[0];
for(int i=1;i<partout.length;i++){
results = joinParts(results,partout[i]);
targets = joinParts(targets,partin[i]);
}
//emit
for(int i=0;i<results.length;i++){
results[i].addAll(0, ids); //add commonFields at first
collector.emit(targets[i], results[i]);
System.out.println("["+Thread.currentThread().getName()+"] => " + results[i]);
}
//ack
for (List<Tuple> part : parts.values()) {
for(Tuple tuple : part){
collector.ack(tuple);
}
}
}
//cartesian product: size = size1 * size2
private List[] joinParts (List[] part1, List[] part2){
List[] result = new List[part1.length * part2.length];
int i = 0;
for(List l1 : part1){
for(List l2 : part2){
result[i] = new LinkedList();
result[i].addAll(l1);
result[i].addAll(l2);
i++;
}
}
return result;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(outputFields);
}
@Override
public Map<String, Object> getComponentConfiguration() {
Map<String, Object> conf = new HashMap<String, Object>();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, expireInSeconds);
return conf;
}
public int getExpireInSeconds() {
return expireInSeconds;
}
public void setExpireInSeconds(int expireInSeconds) {
this.expireInSeconds = expireInSeconds;
}
private class ExpireCallback implements RotatingMap.ExpiredCallback<List<Object>, Map<GlobalStreamId, List<Tuple>>> {
@Override
public void expire(List<Object> id, Map<GlobalStreamId, List<Tuple>> tuplelists) {
//if this joinMap is full
if (tuplelists.size() == numSources) {
emit(id, tuplelists);
}
else{
for(List<Tuple> tuples : tuplelists.values()){
for (Tuple tuple : tuples) {
System.err.println("Delete expired tuple: {" + tuple.toString() + "}");
collector.fail(tuple);
}
}
}
}
}
}
storm bolt多重聚合
最新推荐文章于 2021-06-26 13:41:41 发布