看了一下trident的部分源码,然后分享一下
我将源码的部分用流程图的形式整理出来了,下面会慢慢的讲解着写流程
这部分介绍的是ItridentSpout的部分
trident中的spout由两部分组成,一部分是协调spout和消息发送的Bolt接口,协调spout的逻辑会被部署到多个节点中运行,消息发送节点会被部署到Bolt中执行
spout的创建涉及三个接口:ItridentSpout,BatchCoordinator, Emitter,其中后面2个是第一个的内部接口
TridentSpoutCoordinator接收到MBC的batch流会调用execute()方法调用BatchCoordinator方法初始化一个消息,并向外发送一个batch流
通过源码来看这些整体的流程
if(_active) {
if(_activeTx.size() < _maxTransactionActive) {
Long curr = _currTransaction; //_currTransaction 是下一个需要提交的事务
for(int i=0; i<_maxTransactionActive; i++) {
if(!_activeTx.containsKey(curr) && isReady(curr)) {
// by using a monotonically increasing attempt id, downstream tasks
// can be memory efficient by clearing out state for old attempts
// as soon as they see a higher attempt id for a transaction
Integer attemptId = _attemptIds.get(curr);//拿到需要提交事务的尝试编号
if(attemptId==null) {//如果为null则是新的事务 不然则是重发的
attemptId = 0;
} else {
attemptId++;
}
_attemptIds.put(curr, attemptId);
for(TransactionalState state: _states) {//遍历事务存储的元数据
state.setData(CURRENT_ATTEMPTS, _attemptIds);//将每一个Spout所对应的当前事务的尝试信息更新到ZooKeeper
}
TransactionAttempt attempt = new TransactionAttempt(curr, attemptId);
final TransactionStatus newTransactionStatus = new TransactionStatus(attempt);
_activeTx.put(curr, newTransactionStatus);//保存事务的尝试状态
_collector.emit(BATCH_STREAM_ID, new Values(attempt), attempt);//发送batch流
LOG.debug("Emitted on [stream = {}], [tx_attempt = {}], [tx_status = {}], [{}]", BATCH_STREAM_ID, attempt, newTransactionStatus, this);
_throttler.markEvent();
}
curr = nextTransactionId(curr);//事务id加一 然后判读+1的事务是否完成 isReady(curr)
}
}
}
2.TSC收到batch流后,在execute()方法中,继续向外发送batch流
public void execute(Tuple tuple, BasicOutputCollector collector) {
TransactionAttempt attempt = (TransactionAttempt) tuple.getValue(0);//拿到当前事务id 和尝试次数
if(tuple.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) {
_state.cleanupBefore(attempt.getTransactionId());//如果是从success流中获取数据 则清理在zookeeper上面的数据
_coord.success(attempt.getTransactionId());//删除此id的所有状态
} else {
long txid = attempt.getTransactionId();
Object prevMeta = _state.getPreviousState(txid);//如果是第一次prevMeta为
Object meta = _coord.initializeTransaction(txid, prevMeta, _state.getState(txid));//调用BatchCoordinator的initializeTransaction()方法完成初始化
_state.overrideState(txid, meta);//保存在zookeeper上
collector.emit(MasterBatchCoordinator.BATCH_STREAM_ID, new Values(attempt, meta));//发送batch流 }
}
3.TSE收到batch流后,调用用户的Emitter类中的mitBatch()方法,开始向外发送数据。
public void execute(BatchInfo info, Tuple input) {
// there won't be a BatchInfo for the success stream
TransactionAttempt attempt = (TransactionAttempt) input.getValue(0);//拿到事务id和当前尝试次数
if(input.getSourceStreamId().equals(MasterBatchCoordinator.COMMIT_STREAM_ID)) {
if(attempt.equals(_activeBatches.get(attempt.getTransactionId()))) {
((ICommitterTridentSpout.Emitter) _emitter).commit(attempt);/调用commit 收到$commit流的节点会开始提交操作,但trident会按事务号顺序提交事务的,所以由提交bolt来决定是否现在提交,还是先缓存下来之后再提交
_activeBatches.remove(attempt.getTransactionId());//清除掉了当前的事务
} else {
throw new FailedException("Received commit for different transaction attempt");
}
} else if(input.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) {
// valid to delete before what's been committed since
// those batches will never be accessed again
_activeBatches.headMap(attempt.getTransactionId()).clear();//清除掉当前事务编号之前的事务数据
_emitter.success(attempt);//清除此id的所有状态
} else {
_collector.setBatch(info.batchId);//collector为AdldCollector类型,AdldCollector在发送消息时将事务序号添加到第1列
_emitter.emitBatch(attempt, input.getValue(1), _collector);//发送batch流
_activeBatches.put(attempt.getTransactionId(), attempt);//存储当前节点上运行的事务以及尝试编号
}
}
4.当整个消息被成功处理后会调用MBC的ack方法,ack方法会将事务的状态从PROCESSING改为PROCESSED
public void ack(Object msgId) {
TransactionAttempt tx = (TransactionAttempt) msgId;
TransactionStatus status = _activeTx.get(tx.getTransactionId());
LOG.debug("Ack. [tx_attempt = {}], [tx_status = {}], [{}]", tx, status, this);
if(status!=null && tx.equals(status.attempt)) {
if(status.status==AttemptStatus.PROCESSING) {//最开始的状态PROCESSING
status.status = AttemptStatus.PROCESSED;//通过ack修改为PROCESSED
LOG.debug("Changed status. [tx_attempt = {}] [tx_status = {}]", tx, status);
} else if(status.status==AttemptStatus.COMMITTING) {
_activeTx.remove(tx.getTransactionId());
_attemptIds.remove(tx.getTransactionId());
_collector.emit(SUCCESS_STREAM_ID, new Values(tx));
_currTransaction = nextTransactionId(tx.getTransactionId());
for(TransactionalState state: _states) {
state.setData(CURRENT_TX, _currTransaction);
}
LOG.debug("Emitted on [stream = {}], [tx_attempt = {}], [tx_status = {}], [{}]", SUCCESS_STREAM_ID, tx, status, this);
}
sync();//调用sync方法
}
}
进入sync()方法
TransactionStatus maybeCommit = _activeTx.get(_currTransaction);//拿到下一个需要提交事务的状态 if(maybeCommit!=null && maybeCommit.status == AttemptStatus.PROCESSED) {//上一步将事务的状态修改为了PROCESSED maybeCommit.status = AttemptStatus.COMMITTING;//修改事务状态为commit _collector.emit(COMMIT_STREAM_ID, new Values(maybeCommit.attempt), maybeCommit.attempt);//往commit流中发送消息 LOG.debug("Emitted on [stream = {}], [tx_status = {}], [{}]", COMMIT_STREAM_ID, maybeCommit, this); }
4.1 当消息处理失败时
public void fail(Object msgId) {//在收到失败消息时,失败的事务及其后续事务都需要重传,但事务的元数据并不会重新产生,而是利用之前初始化的内容获得
TransactionAttempt tx = (TransactionAttempt) msgId;//拿到事务信息
TransactionStatus stored = _activeTx.remove(tx.getTransactionId());//删除在_activeTx中的事务尝试状态,并将事务的信息返回
LOG.debug("Fail. [tx_attempt = {}], [tx_status = {}], [{}]", tx, stored, this);
if(stored!=null && tx.equals(stored.attempt)) {
_activeTx.tailMap(tx.getTransactionId()).clear();//将该事务之后的事务状态删除,
sync();
}
5.TSE处理commit流
if(input.getSourceStreamId().equals(MasterBatchCoordinator.COMMIT_STREAM_ID)) {
if(attempt.equals(_activeBatches.get(attempt.getTransactionId()))) {
((ICommitterTridentSpout.Emitter) _emitter).commit(attempt);/调用commit 收到$commit流的节点会开始提交操作,但trident会按事务号顺序提交事务的,所以由提交bolt来决定是否现在提交,还是先缓存下来之后再提交
_activeBatches.remove(attempt.getTransactionId());//清除掉了当前的事务
} else {
throw new FailedException("Received commit for different transaction attempt");
}
}
6.当$commit流处理完后,MBC的ack()方法会被再次调用,同时向外发送$success流
if(status.status==AttemptStatus.COMMITTING) {
_activeTx.remove(tx.getTransactionId());//将该事务的尝试状态去除
_attemptIds.remove(tx.getTransactionId());//去除当前事务的尝试编号
_collector.emit(SUCCESS_STREAM_ID, new Values(tx));//往success流中发送消息
_currTransaction = nextTransactionId(tx.getTransactionId());//将当前事务序号更新为下一个事务序号
for(TransactionalState state: _states) {
state.setData(CURRENT_TX, _currTransaction);//将每一个Spout所对应的当前事务信息更新到ZooKeeper CURRENT_TX 是存储当前的事务在zookeeper上面的元数据路径
}
7.TSC处理$commit流
if(tuple.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) {
_state.cleanupBefore(attempt.getTransactionId());//如果是从success流中获取数据 则清理在zookeeper上面的数据
_coord.success(attempt.getTransactionId());//删除此id的所有状态
}
8.TSE处理$success流
else if(input.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) { // valid to delete before what's been committed since // those batches will never be accessed again _activeBatches.headMap(attempt.getTransactionId()).clear();//清除掉当前事务编号之前的事务数据 _emitter.success(attempt);//清除此id的所有状态 }9.isReay()方法 在我们实现 BatchCoordinator接口的时候 对应isReady()方法都是默认返回true的
private boolean isReady(long txid) { if(_throttler.isThrottled()) return false; //TODO: make this strategy configurable?... right now it goes if anyone is ready for(ITridentSpout.BatchCoordinator coord: _coordinators) { if(coord.isReady(txid)) return true; } return false; }
10.整个流程到此结束
总结说就是消息是从MasterBatchCoordinator开始的,它是一个真正的spout,而TridentSpoutCoordinator与TridentSpoutExecutor都是bolt,MasterBatchCoordinator发起协调消息,最后的结果是TridentSpoutExecutor发送业务消息。而发送协调消息与业务消息的都是调用用户Spout中BatchCoordinator与Emitter中定义的代码。
11.将spout加载到topology中
1.在TridentTopologyBuilder的buildTopololg方法中设置了topo的相关信息
2.在TridentTopology中调用newStream方法,将spout节点加入拓扑。
在这两个类中就会调用MBC,TSC,TSE这些类
这篇博客很大程度上参考了http://blog.csdn.net/jediael_lu/