2021SC@SDUSC
使用方法(步骤):
1.上游在prepare方法保存下游bolt的taskId列表
public class SentenceDirectBolt extends BaseRichBolt {
private static final Logger LOGGER = LoggerFactory.getLogger(SentenceDirectBolt.class);
private OutputCollector collector;
private List<Integer> taskIds;
private int numCounterTasks;
public void prepare(Map config, TopologyContext context, OutputCollector collector) {
this.collector = collector;
//NOTE 1 这里要取到下游的bolt的taskId,用于emitDirect时指定taskId
this.taskIds = context.getComponentTasks("count-bolt");
this.numCounterTasks = taskIds.size();
}
//......
}
2.上游在declareOutputFields使用declareStream声明streamId
public class SentenceDirectBolt extends BaseRichBolt {
//......
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
//NOTE 2 这里要通过declareStream声明direct stream,并指定streamId
declarer.declareStream("directStreamDemo1",true,new Fields("word"));
declarer.declareStream("directStreamDemo2",true,new Fields("word"));
}
}
3.上游采用emitDirect指定下游taskId及streamId
public class SentenceDirectBolt extends BaseRichBolt {
//......
public void execute(Tuple tuple) {
String sentence = tuple.getStringByField("sentence");
String[] words = sentence.split(" ");
for(String word : words){
int targetTaskId = getWordCountTaskId(word);
LOGGER.info("word:{} choose taskId:{}",word,targetTaskId);
// NOTE 3 这里指定发送给下游bolt的哪个taskId,同时指定streamId
if(targetTaskId % 2 == 0){
this.collector.emitDirect(targetTaskId,"directStreamDemo1",new Values(word));
}else{
this.collector.emitDirect(targetTaskId,"directStreamDemo2",new Values(word));
}
}
this.collector.ack(tuple);
}
}
4.下游使用directGrouping连接上游bolt及streamId
@Test
public void testDirectGrouping() throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("sentence-spout", new SentenceSpout());
// SentenceSpout --> SplitSentenceBolt
builder.setBolt("split-bolt", new SentenceDirectBolt()).shuffleGrouping("sentence-spout");
// SplitSentenceBolt --> WordCountBolt
//NOTE 4这里要指定上游的bolt以及要处理的streamId
builder.setBolt("count-bolt", new WordCountBolt(),5).directGrouping("split-bolt","directStreamDemo1");
// WordCountBolt --> ReportBolt
builder.setBolt("report-bolt", new ReportBolt()).globalGrouping("count-bolt");
submitRemote(builder);
}
总结:
由上游的producer直接指定下游哪个task去接收它发射出来的tuple。下游使用directGrouping连接上游同时指定要消费的streamId,上游在prepare的时候保存下游的taskId列表,然后在declareOutputFields的时候使用declareStream来声明streamId,最后在execute方法里头使用emitDirect(int taskId, String streamId, List<Object> tuple)方法指定了下游的taskId以及要发送到的streamId