https://docs.google.com/document/d/1Lr9UYXEz6s6R_3PWg3bZQLF3upGaNEkc0rQCFSzaYDI/edit
// create the original stream DataStream<String> stream = ...; // apply the async I/O transformation DataStream<Tuple2<String, String>> resultStream = AsyncDataStream.unorderedWait(stream, new AsyncDatabaseRequest(), 1000, TimeUnit.MILLISECONDS, 100);
AsyncDataStream
有一组接口,
unorderedWait
orderedWait
最终都是调用到,
addOperator(in, func, timeUnit.toMillis(timeout), capacity, OutputMode.ORDERED)
是否是ordered,只是最后一个参数不同
private static <IN, OUT> SingleOutputStreamOperator<OUT> addOperator( DataStream<IN> in, AsyncFunction<IN, OUT> func, long timeout, int bufSize, OutputMode mode) { TypeInformation<OUT> outTypeInfo = TypeExtractor.getUnaryOperatorReturnType(func, AsyncFunction.class, false, true, in.getType(), Utils.getCallLocationName(), true); // create transform AsyncWaitOperator<IN, OUT> operator = new AsyncWaitOperator<>( in.getExecutionEnvironment().clean(func), timeout, bufSize, mode); return in.transform("async wait operator", outTypeInfo, operator); }
AsyncWaitOperator
setup主要是初始化,任务队列
@Override public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) { super.setup(containingTask, config, output); // create the operators executor for the complete operations of the queue entries this.executor = Executors.newSingleThreadExecutor(); //单线程的Executor,用于处理队列 switch (outputMode) { case ORDERED: queue = new OrderedStreamElementQueue( capacity, executor, this); break; case UNORDERED: queue = new UnorderedStreamElementQueue( capacity, executor, this); break; default: throw new IllegalStateException("Unknown async mode: " + outputMode + '.'); } }
看下,OrderedStreamElementQueue
public class OrderedStreamElementQueue implements StreamElementQueue { /** Queue for the inserted StreamElementQueueEntries. */ private final ArrayDeque<StreamElementQueueEntry<?>> queue; //放所有的element @Override public AsyncResult peekBlockingly() throws InterruptedException { //取 lock.lockInterruptibly(); try { while (queue.isEmpty() || !queue.peek().isDone()) { //如果queue的第一个element没有完成 headIsCompleted.await(); //等锁,等他完成 } return queue.peek(); //如果完成就peek出来,注意peek是不会移除这个element的,所以需要poll } finally { lock.unlock(); } } @Override public AsyncResult poll() throws InterruptedException { //单独做poll lock.lockInterruptibly(); try { while (queue.isEmpty() || !queue.peek().isDone()) { //如果第一个没完成,等待 headIsCompleted.await(); } notFull.signalAll(); //poll后,队列一定不满,所以解锁notFull return queue.poll(); } finally { lock.unlock(); } } private <T> void addEntry(StreamElementQueueEntry<T> streamElementQueueEntry) { //put,tryput都是调用这个 queue.addLast(streamElementQueueEntry); //加到queue里面 streamElementQueueEntry.onComplete(new AcceptFunction<StreamElementQueueEntry<T>>() { //给element加上complete的callback,调用onCompleteHandler @Override public void accept(StreamElementQueueEntry<T> value) { try { onCompleteHandler(value); } } }, executor); } private void onCompleteHandler(StreamElementQueueEntry<?> streamElementQueueEntry) throws InterruptedException { lock.lockInterruptibly(); try { if (!queue.isEmpty() && queue.peek().isDone()) { headIsCompleted.signalAll(); //放开锁,告诉大家我完成了 } } finally { lock.unlock(); } } }
对于queue主要就是,读取操作
这里取是分两步,先peek,再poll
open,主要是处理从snapshot中恢复的数据
并启动emiter
@Override public void open() throws Exception { super.open(); // process stream elements from state, since the Emit thread will start as soon as all // elements from previous state are in the StreamElementQueue, we have to make sure that the // order to open all operators in the operator chain proceeds from the tail operator to the // head operator. if (recoveredStreamElements != null) { for (StreamElement element : recoveredStreamElements.get()) { //处理从snapshot中恢复出的element if (element.isRecord()) { processElement(element.<IN>asRecord()); } else if (element.isWatermark()) { processWatermark(element.asWatermark()); } else if (element.isLatencyMarker()) { processLatencyMarker(element.asLatencyMarker()); } else { throw new IllegalStateException("Unknown record type " + element.getClass() + " encountered while opening the operator."); } } recoveredStreamElements = null; } // create the emitter this.emitter = new Emitter<>(checkpointingLock, output, queue, this); //创建Emitter // start the emitter thread this.emitterThread = new Thread(emitter, "AsyncIO-Emitter-Thread (" + getOperatorName() + ')'); emitterThread.setDaemon(true); emitterThread.start(); }
Emitter
@Override public void run() { try { while (running) { LOG.debug("Wait for next completed async stream element result."); AsyncResult streamElementEntry = streamElementQueue.peekBlockingly(); output(streamElementEntry); }
从queue中peek数据,对于上面OrderedStreamElementQueue,只有完成的数据会被peek到
private void output(AsyncResult asyncResult) throws InterruptedException { if (asyncResult.isWatermark()) { //...... } else { AsyncCollectionResult<OUT> streamRecordResult = asyncResult.asResultCollection(); synchronized (checkpointLock) { //collect数据需要加checkpoint锁 LOG.debug("Output async stream element collection result."); try { Collection<OUT> resultCollection = streamRecordResult.get(); if (resultCollection != null) { for (OUT result : resultCollection) { timestampedCollector.collect(result); //真正emit数据 } } } // remove the peeked element from the async collector buffer so that it is no longer // checkpointed streamElementQueue.poll(); //emit完可以将数据从queue中删除 // notify the main thread that there is again space left in the async collector // buffer checkpointLock.notifyAll(); } } }
可以看到当数据被emit后,才会从queue删除掉
processElement
@Override public void processElement(StreamRecord<IN> element) throws Exception { final StreamRecordQueueEntry<OUT> streamRecordBufferEntry = new StreamRecordQueueEntry<>(element); //封装成StreamRecordQueueEntry if (timeout > 0L) { // register a timeout for this AsyncStreamRecordBufferEntry long timeoutTimestamp = timeout + getProcessingTimeService().getCurrentProcessingTime(); final ScheduledFuture<?> timerFuture = getProcessingTimeService().registerTimer( //开个定时器,到时间就会colloct一个超时异常 timeoutTimestamp, new ProcessingTimeCallback() { @Override public void onProcessingTime(long timestamp) throws Exception { streamRecordBufferEntry.collect( new TimeoutException("Async function call has timed out.")); } }); // Cancel the timer once we've completed the stream record buffer entry. This will remove // the register trigger task streamRecordBufferEntry.onComplete(new AcceptFunction<StreamElementQueueEntry<Collection<OUT>>>() { //在StreamRecordQueueEntry完成是触发删除这个定时器,这样就只有未完成的会触发定时器 @Override public void accept(StreamElementQueueEntry<Collection<OUT>> value) { timerFuture.cancel(true); } }, executor); } addAsyncBufferEntry(streamRecordBufferEntry); //把StreamRecordQueueEntry加到queue中去 userFunction.asyncInvoke(element.getValue(), streamRecordBufferEntry); //调用用户定义的asyncInvoke }
StreamRecordQueueEntry
public class StreamRecordQueueEntry<OUT> extends StreamElementQueueEntry<Collection<OUT>> implements AsyncCollectionResult<OUT>, AsyncCollector<OUT> { /** Future containing the collection result. */ private final CompletableFuture<Collection<OUT>> resultFuture; @Override public void collect(Collection<OUT> result) { resultFuture.complete(result); } @Override public void collect(Throwable error) { resultFuture.completeExceptionally(error); } }
前面在emitter里面判断,entry是否做完就看,resultFuture是否isDone
可以看到resultFuture只有在collect的时候才会被complete
当resultFuture.complete时,onComplete callback会被触发,
这个callback在OrderedStreamElementQueue.addEntry被注册上来,做的事也就是告诉大家headIsCompleted;这样随后Emitter可以把结果数据emit出去
最终调用到用户定义的,
userFunction.asyncInvoke
@Override public void asyncInvoke(final String str, final AsyncCollector<Tuple2<String, String>> asyncCollector) throws Exception { // issue the asynchronous request, receive a future for result Future<String> resultFuture = client.query(str); // set the callback to be executed once the request by the client is complete // the callback simply forwards the result to the collector resultFuture.thenAccept( (String result) -> { asyncCollector.collect(Collections.singleton(new Tuple2<>(str, result))); }); } }
首先client必须是异步的,如果不是,没法返回Future,那需要自己用连接池实现
主要逻辑就是在resultFuture完成后,调用asyncCollector.collect把结果返回给element