storm bolt多重聚合


import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.newegg.storm.util.TupleHelpers;


import backtype.storm.Config;
import backtype.storm.generated.GlobalStreamId;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.RotatingMap;

/**
 * Multiple to multiple join
 * join tuples during a specified period
 * for example we receive tuples as follow:
 * stream1: (A,B1,D1),(A,B2,D2)
 * stream2: (A,C1),(A,C2)
 * then join the two sources, we emit four output tuples:
 * (A,B1,D1,C1)
 * (A,B1,D1,C2)
 * (A,B2,D2,C1)
 * (A,B2,D2,C2)
 * @author bw67
 *
 */
public class MultipleJoinBolt extends BaseRichBolt {
	
	private OutputCollector collector;
	private TopologyContext context;
	private Fields commonFields;
	private Fields[] otherFields;
	private Fields outputFields;
	private int numSources;
	private int expireInSeconds = 10;

	private RotatingMap<List<Object>, Map<GlobalStreamId, List<Tuple>>> pending; //<idFields, <sourceComponent,tuples>>
	private Map<Fields, GlobalStreamId> fieldLocations; //<fields, sourceComponent>
	
	
	/**
	 * multiple join
	 * @param expireInSeconds
	 * @param fields
	 */
	public MultipleJoinBolt(int expireInSeconds, Fields...fields){
		this.expireInSeconds = expireInSeconds;
		findCommonFields(fields);
		findOtherFields(fields);
		makeOutputFields();
	}
	
	/**
	 * [[A,B,D], [A,C]] => [A]
	 * @param fields
	 */
	private void findCommonFields(Fields...fields){
		Set c = new HashSet(fields[0].toList());		
		for(int i=1;i<fields.length;i++){
			c.retainAll(fields[i].toList());
		}
		commonFields = new Fields(new ArrayList(c));
	}
	
	/**
	 * [[A,B,D], [A,C]] => [[B,D], [C]]
	 * @param fields
	 */
	private void findOtherFields(Fields...fields){
		otherFields = new Fields[fields.length];
		for(int i=0;i<fields.length;i++){
			List l = fields[i].toList();
			l.removeAll(commonFields.toList());
			otherFields[i] = new Fields(l);
		}
	}
	
	/**
	 * outputFields: [A,B,D,C]
	 */
	private void makeOutputFields(){
		List fields = commonFields.toList();
		for(Fields f : otherFields){
			fields.addAll(f.toList());
		}
		outputFields = new Fields(fields);
	}
	
	@Override
	public void prepare(Map stormConf, TopologyContext context,
			OutputCollector collector) {
		this.fieldLocations = new HashMap<Fields, GlobalStreamId>();
		this.collector = collector;
		this.context = context;
		//give two buckets <commonFields, <streamId, tuples>
		//[A]->{S1->[A,B,D]}
		//[A]->{S2->[A,C]}
		this.pending = new RotatingMap<List<Object>, Map<GlobalStreamId, List<Tuple>>>(2, new ExpireCallback());
		//number of tuple sources
		this.numSources = context.getThisSources().size();
		
	    mapFieldsToSource();
	}
	
	/**
	 * fieldLocations: Fields -> Source
	 * [B,D] -> S1
	 * [C] -> S2
	 */
	private void mapFieldsToSource(){
		for (GlobalStreamId source : context.getThisSources().keySet()) {
			Fields fields = context.getComponentOutputFields(source.get_componentId(), source.get_streamId());
			for (Fields of : otherFields) {
				if(fields.toList().containsAll(of.toList())){
					fieldLocations.put(of, source);
				}
			}
	    }
	}
	

	@Override
	public void execute(Tuple input) {
		
		//if time is up, rotate the pending map
	    if (TupleHelpers.isTickTuple(input)){
	    	pending.rotate();
	    	return;
	    }
		
		//get the values of id fields
	    List<Object> id = input.select(commonFields);
	    //get this source component
	    GlobalStreamId streamId = new GlobalStreamId(input.getSourceComponent(), input.getSourceStreamId());
	    //if pending map doesn't contain this id, put it in
	    if (!pending.containsKey(id)) {
	    	pending.put(id, new HashMap<GlobalStreamId, List<Tuple>>());
	    }
	    //get components joined map by id
	    Map<GlobalStreamId, List<Tuple>> parts = pending.get(id);

	    //put this tuple to the map 
	    if(!parts.containsKey(streamId)){
	    	parts.put(streamId, new LinkedList<Tuple>());
	    }
	    parts.get(streamId).add(input);
	    
	    //update this active parts into the first bucket of the rotatingmap
	    pending.put(id, parts);
	    	
	}
	
	private void emit(List<Object> ids, Map<GlobalStreamId, List<Tuple>> parts){
		//partin: [[(A,B1,D1),(A,B2,D2)],[(A,C1),(A,C2)]]
        //partout: [[[B1,D1],[B2,D2]],[[C1],[C2]]]
    	List[][] partin = new List[otherFields.length][];
	    List[][] partout = new List[otherFields.length][];
    	//add other fields
    	for (int i=0;i<otherFields.length;i++){
    		GlobalStreamId loc = fieldLocations.get(otherFields[i]);
    		List<Tuple> tuples = parts.get(loc);
    		List[] values = new List[tuples.size()];  		
    		List[] targets = new List[tuples.size()];
    		for(int j=0;j<tuples.size();j++){
    			values[j] = tuples.get(j).select(otherFields[i]);
    			targets[j] = new ArrayList(1);
    			targets[j].add(tuples.get(j));
    		}
    		partout[i] = values;
    		partin[i] = targets;
    	}
    	
    	//targets: [[(A,B1,D1),(A,C1)],[(A,B1,D1),(A,C2)],[(A,B2,D2),(A,C1)],[(A,B2,D2),(A,C2)]]
    	//results: [[[B1,D1],[C1]],[[B1,D1],[C2]],[[B2,D2],[C1]],[[B2,D2],[C2]]]
    	List[] targets = partin[0]; 
    	List[] results = partout[0]; 	
    	for(int i=1;i<partout.length;i++){
    		results = joinParts(results,partout[i]);
    		targets = joinParts(targets,partin[i]);
    	}
    	
    	//emit
    	for(int i=0;i<results.length;i++){
    		results[i].addAll(0, ids); //add commonFields at first
    		collector.emit(targets[i], results[i]);
    		System.out.println("["+Thread.currentThread().getName()+"] => " + results[i]);
    	}

    	//ack
    	for (List<Tuple> part : parts.values()) {
    		for(Tuple tuple : part){
    			collector.ack(tuple);
    		}	    		
    	}
	}
	
	//cartesian product: size = size1 * size2
	private List[] joinParts (List[] part1, List[] part2){
		List[] result = new List[part1.length * part2.length];
		int i = 0;
		for(List l1 : part1){
			for(List l2 : part2){
				result[i] = new LinkedList();
				result[i].addAll(l1);
				result[i].addAll(l2);
				i++;
			}
		}
		return result;
	}
	

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(outputFields);
		
	}
	
	@Override
	public Map<String, Object> getComponentConfiguration() {
		Map<String, Object> conf = new HashMap<String, Object>();
	    conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, expireInSeconds);
	    return conf;
	}


	public int getExpireInSeconds() {
		return expireInSeconds;
	}

	public void setExpireInSeconds(int expireInSeconds) {
		this.expireInSeconds = expireInSeconds;
	}
	

	private class ExpireCallback implements RotatingMap.ExpiredCallback<List<Object>, Map<GlobalStreamId, List<Tuple>>> {
	    @Override
	    public void expire(List<Object> id, Map<GlobalStreamId, List<Tuple>> tuplelists) {
	    	//if this joinMap is full
		    if (tuplelists.size() == numSources) {
		    	emit(id, tuplelists);	
		    }
		    else{
		    	for(List<Tuple> tuples : tuplelists.values()){
		    		for (Tuple tuple : tuples) {
			    		System.err.println("Delete expired tuple: {" + tuple.toString() + "}");
			    		collector.fail(tuple);
			    	}
		    	} 
		    }	    	  	
	    }
	}

}


                
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值