2021SC@SDUSC
spout源码分析(三)
2021SC@SDUSC
TopologyBuilder.java
public StormTopology createTopology() {
Map<String, Bolt> boltSpecs = new HashMap<>();
Map<String, SpoutSpec> spoutSpecs = new HashMap<>();
maybeAddCheckpointSpout();
for (String boltId : bolts.keySet()) {
IRichBolt bolt = bolts.get(boltId);
bolt = maybeAddCheckpointTupleForwarder(bolt);
ComponentCommon common = getComponentCommon(boltId, bolt);
try {
maybeAddCheckpointInputs(common);
boltSpecs.put(boltId, new Bolt(ComponentObject.serialized_java(Utils.javaSerialize(bolt)), common));
} catch (RuntimeException wrapperCause) {
if (wrapperCause.getCause() != null && NotSerializableException.class.equals(wrapperCause.getCause().getClass())) {
throw new IllegalStateException("Bolt '" + boltId + "' contains a non-serializable field of type "
+ wrapperCause.getCause().getMessage() + ", "
+ "which was instantiated prior to topology creation. "
+ wrapperCause.getCause().getMessage()
+ " "
+ "should be instantiated within the prepare method of '"
+ boltId
+ " at the earliest.",
wrapperCause);
}
throw wrapperCause;
}
}
for (String spoutId : spouts.keySet()) {
IRichSpout spout = spouts.get(spoutId);
ComponentCommon common = getComponentCommon(spoutId, spout);
try {
spoutSpecs.put(spoutId, new SpoutSpec(ComponentObject.serialized_java(Utils.javaSerialize(spout)), common));
} catch (RuntimeException wrapperCause) {
if (wrapperCause.getCause() != null && NotSerializableException.class.equals(wrapperCause.getCause().getClass())) {
throw new IllegalStateException(
"Spout '" + spoutId + "' contains a non-serializable field of type "
+ wrapperCause.getCause().getMessage()
+ ", which was instantiated prior to topology creation. "
+ wrapperCause.getCause().getMessage()
+ " should be instantiated within the open method of '"
+ spoutId
+ " at the earliest.",
wrapperCause);
}
throw wrapperCause;
}
}
StormTopology stormTopology = new StormTopology(spoutSpecs,
boltSpecs,
new HashMap<>());
stormTopology.set_worker_hooks(workerHooks);
if (!componentToSharedMemory.isEmpty()) {
stormTopology.set_component_to_shared_memory(componentToSharedMemory);
stormTopology.set_shared_memory(sharedMemory);
}
return Utils.addVersions(stormTopology);
}
public SpoutDeclarer setSpout(String id, SerializableSupplier<?> supplier) throws IllegalArgumentException {
return setSpout(id, supplier, null);
}
public SpoutDeclarer setSpout(String id, SerializableSupplier<?> supplier, Number parallelismHint) throws IllegalArgumentException {
return setSpout(id, new LambdaSpout(supplier), parallelismHint);
}
private void maybeAddCheckpointSpout() {
if (hasStatefulBolt) {
setSpout(CHECKPOINT_COMPONENT_ID, new CheckpointSpout(), 1);
}
}
private void maybeAddCheckpointInputs(ComponentCommon common) {
if (hasStatefulBolt) {
addCheckPointInputs(common);
}
}
private void addCheckPointInputs(ComponentCommon component) {
Set<GlobalStreamId> checkPointInputs = new HashSet<>();
for (GlobalStreamId inputStream : component.get_inputs().keySet()) {
String sourceId = inputStream.get_componentId();
if (spouts.containsKey(sourceId)) {
checkPointInputs.add(new GlobalStreamId(CHECKPOINT_COMPONENT_ID, CHECKPOINT_STREAM_ID));
} else {
checkPointInputs.add(new GlobalStreamId(sourceId, CHECKPOINT_STREAM_ID));
}
}
for (GlobalStreamId streamId : checkPointInputs) {
component.put_to_inputs(streamId, Grouping.all(new NullStruct()));
}
}
public SpoutDeclarer setSpout:
在此拓扑中使用指定的parallelm定义一个新的spout。如果spout声明自己是非分布式的,则parallelm_hint将被忽略,并且只有一个任务将被分配给这个组件。
id:这个组件的id。这个id被其他想要使用这个喷口输出的组件引用。
parallelismHint:应该分配给执行这个spout的任务的数量。每个任务将运行在集群周围某个进程中的一个线程上。
private void addCheckPointInputs:
对于具有来自spouts的传入流的bolt,将来自检查点spout的检查点流添加到其输入。对于其他bolt,将前一个bolt的检查点流添加到其输入
TopologyBuilder在创建拓扑的时候,会调用maybeAddCheckpointSpout,如果是hasStatefulBolt的话,则会自动创建并添加CheckpointSpout。
CheckpointSpout.java
如果一个拓扑含有Stateful bolts,Checkpoint spouts将自动添加到拓扑中。每个只有一个Checkpoint任务.
public class CheckpointSpout extends BaseRichSpout {
public static final String CHECKPOINT_STREAM_ID = "$checkpoint";
public static final String CHECKPOINT_COMPONENT_ID = "$checkpointspout";
public static final String CHECKPOINT_FIELD_TXID = "txid";
public static final String CHECKPOINT_FIELD_ACTION = "action";
private static final Logger LOG = LoggerFactory.getLogger(CheckpointSpout.class);
private static final String TX_STATE_KEY = "__state";
private TopologyContext context;
private SpoutOutputCollector collector;
private long lastCheckpointTs;
private int checkpointInterval;
private int sleepInterval;
private boolean recoveryStepInProgress;
private boolean checkpointStepInProgress;
private boolean recovering;
private KeyValueState<String, CheckPointState> checkpointState;
private CheckPointState curTxState;
public static boolean isCheckpoint(Tuple input) {
return CHECKPOINT_STREAM_ID.equals(input.getSourceStreamId());
}
@Override
public void open(Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
open(context, collector, loadCheckpointInterval(conf), loadCheckpointState(conf, context));
}
// package access for unit test
void open(TopologyContext context, SpoutOutputCollector collector,
int checkpointInterval, KeyValueState<String, CheckPointState> checkpointState) {
this.context = context;
this.collector = collector;
this.checkpointInterval = checkpointInterval;
this.sleepInterval = checkpointInterval / 10;
this.checkpointState = checkpointState;
this.curTxState = checkpointState.get(TX_STATE_KEY);
lastCheckpointTs = 0;
recoveryStepInProgress = false;
checkpointStepInProgress = false;
recovering = true;
}
@Override
public void nextTuple() {
if (shouldRecover()) {
handleRecovery();
startProgress();
} else if (shouldCheckpoint()) {
doCheckpoint();
startProgress();
} else {
Utils.sleep(sleepInterval);
}
}
@Override
public void ack(Object msgId) {
LOG.debug("Got ack with txid {}, current txState {}", msgId, curTxState);
if (curTxState.getTxid() == ((Number) msgId).longValue()) {
if (recovering) {
handleRecoveryAck();
} else {
handleCheckpointAck();
}
} else {
LOG.warn("Ack msgid {}, txState.txid {} mismatch", msgId, curTxState.getTxid());
}
resetProgress();
}
@Override
public void fail(Object msgId) {
LOG.debug("Got fail with msgid {}", msgId);
if (!recovering) {
LOG.debug("Checkpoint failed, will trigger recovery");
recovering = true;
}
resetProgress();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(CHECKPOINT_STREAM_ID, new Fields(CHECKPOINT_FIELD_TXID, CHECKPOINT_FIELD_ACTION));
}
private KeyValueState<String, CheckPointState> loadCheckpointState(Map<String, Object> conf, TopologyContext ctx) {
String namespace = ctx.getThisComponentId() + "-" + ctx.getThisTaskId();
KeyValueState<String, CheckPointState> state =
(KeyValueState<String, CheckPointState>) StateFactory.getState(namespace, conf, ctx);
if (state.get(TX_STATE_KEY) == null) {
CheckPointState txState = new CheckPointState(-1, COMMITTED);
state.put(TX_STATE_KEY, txState);
state.commit();
LOG.debug("Initialized checkpoint spout state with txState {}", txState);
} else {
LOG.debug("Got checkpoint spout state {}", state.get(TX_STATE_KEY));
}
return state;
}
private int loadCheckpointInterval(Map<String, Object> topoConf) {
int interval = 0;
if (topoConf.containsKey(Config.TOPOLOGY_STATE_CHECKPOINT_INTERVAL)) {
interval = ((Number) topoConf.get(Config.TOPOLOGY_STATE_CHECKPOINT_INTERVAL)).intValue();
}
// ensure checkpoint interval is not less than a sane low value.
interval = Math.max(100, interval);
LOG.info("Checkpoint interval is {} millis", interval);
return interval;
}
CheckpointSpout从Config.TOPOLOGY_STATE_CHECKPOINT_INTERVAL读取checkpoint的时间间隔,defaults.yaml中默认是1000,如果没有指定,则使用100,最低值为100。
nextTuple:
首先判断shouldRecover,如果需要恢复则调用handleRecovery进行恢复,然后startProgress;如果需要checkpoint则进行checkpoint,否则sleepInterval再进行下次判断。
如果不需要recover,则调用shouldCheckpoint方法判断是否需要进行checkpoint,如果当前状态不是COMMITTED或者当前时间距离上次checkpoint的时间超过了checkpointInterval,则进行doCheckpoint操作,往CHECKPOINT_STREAM_ID发送下一步的action。
CheckpointSpout在收到ack之后会进行saveTxState操作,调用checkpointState.commit提交整个checkpoint,然后调用resetProgress重置状态。
而如果是fail的ack,则调用resetProgress重置状态。
参考链接:https://blog.csdn.net/weixin_34214500/article/details/87993668