flink 异步 io(Async I/O) 示例

文章目录

异步 I/O 是为了解决 flink 与外部系统 (REST SERVER/Hbase/Mysql 等) 进行频繁交互时的延时而提出的一个特性。

官方文档见

  • https://ci.apache.org/projects/flink/flink-docs-master/dev/stream/operators/asyncio.html
  • https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=65870673
  • https://docs.google.com/document/d/1Lr9UYXEz6s6R_3PWg3bZQLF3upGaNEkc0rQCFSzaYDI/edit#

中文博客也有很多介绍:

  • http://wuchong.me/blog/2017/05/17/flink-internals-async-io/
  • https://blog.icocoro.me/2019/05/26/1905-apache-flinkv2-asyncio/

这里只提供代码示例。

先简单说一下业务逻辑:有一个 scoreDataStream 流,流中是 Score 信息,需要通过 Score 中的 stu_id http 请求获取 Student 信息,然后组合输出。代码中使用了 httpasyncclient 实现回调。

  1. AsyncHttpRequest 算子,继承 RichAsyncFunction<IN, OUT>
import com.google.gson.Gson;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.flink.util.Preconditions;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.concurrent.FutureCallback;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager;
import org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor;
import org.apache.http.impl.nio.reactor.IOReactorConfig;
import org.apache.http.nio.reactor.ConnectingIOReactor;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.CancellationException;

/**
 * An implementation of the 'AsyncFunction' that sends requests and sets the callback.
 */
class AsyncHttpRequest extends RichAsyncFunction<Score, Tuple2<Score,Student>> {

    /** The database specific client that can issue concurrent requests with callbacks */
    private transient CloseableHttpAsyncClient client;

    // 连接超时 ms(三次握手建立连接的时间)
    private int connectionTimeOut;
    // socket 超时 ms (http 请求返回结果的时间)
    private int socketTimeOut;
    // 从连接池中获取 connection 的超时时间(默认不限制,连接用完后会阻塞在这里)
    private int connectionRequestTimeOut = -1;
    // 连接池大小
    private int poolMaxTotal;
    // 单个 host 的最大连接数
    private int maxPerRoute;
    // 默认长连接
    private boolean keepAlive = true;

    private Logger logger = LoggerFactory.getLogger(AsyncHttpRequest.class);
    public AsyncHttpRequest(int capacity, int connectionTimeOut, int socketTimeOut){
        Preconditions.checkArgument(capacity > 0);
        Preconditions.checkArgument(connectionTimeOut > 0);
        Preconditions.checkArgument(socketTimeOut > 0);
        this.maxPerRoute = this.poolMaxTotal = capacity;
        this.connectionTimeOut = connectionTimeOut;
        this.socketTimeOut = socketTimeOut;
    }
    public AsyncHttpRequest setConnectionTimeout(int connectionTimeOut) {
        this.connectionTimeOut = connectionTimeOut;
        return this;
    }
    public AsyncHttpRequest setSocketTimeout(int socketTimeOut) {
        this.socketTimeOut = socketTimeOut;
        return this;
    }
    public AsyncHttpRequest setConnectionRequestTimeOut(int connectionRequestTimeOut) {
        this.connectionRequestTimeOut = connectionRequestTimeOut;
        return this;
    }
    public AsyncHttpRequest setPoolMaxTotal(int poolMaxTotal) {
        this.poolMaxTotal = poolMaxTotal;
        return this;
    }
    public AsyncHttpRequest setMaxPerRoute(int maxPerRoute) {
        this.maxPerRoute = maxPerRoute;
        return this;
    }
    @Override
    public void open(Configuration parameters) throws Exception {
        RequestConfig requestConfig = RequestConfig.custom()
                .setConnectTimeout(connectionTimeOut)
                .setSocketTimeout(socketTimeOut)
                .setConnectionRequestTimeout(connectionRequestTimeOut)
                .build();

        //配置io线程
        IOReactorConfig ioReactorConfig = IOReactorConfig.custom().
                setIoThreadCount(Runtime.getRuntime().availableProcessors())
                .setSoKeepAlive(keepAlive)
                .build();
        //设置连接池大小
        ConnectingIOReactor ioReactor = new DefaultConnectingIOReactor(ioReactorConfig);
        PoolingNHttpClientConnectionManager connManager = new PoolingNHttpClientConnectionManager(ioReactor);
        connManager.setMaxTotal(poolMaxTotal);
        connManager.setDefaultMaxPerRoute(maxPerRoute);

        client = HttpAsyncClients.custom().
                setConnectionManager(connManager)
                .setDefaultRequestConfig(requestConfig)
                .build();
        client.start();
    }

    @Override
    public void close() throws Exception {
        client.close();
    }

    @Override
    public void asyncInvoke(Score score, final ResultFuture<Tuple2<Score,Student>> resultFuture) throws Exception {
        // 这里拼接 http 请求
        HttpGet httpGet = new HttpGet("http://localhost:12345?stu_id=" + score.getStuId());
        // 异步客户端,通过回调方式
        client.execute(httpGet, new FutureCallback<HttpResponse>() {
            @Override
            public void completed(HttpResponse httpResponse) {
                if (httpResponse.getStatusLine().getStatusCode() == 200) {
                    HttpEntity httpEntity = httpResponse.getEntity();
                    Gson gson = new Gson();
                    Student student = null;
                    try {
                        student = gson.fromJson(EntityUtils.toString(httpEntity), Student.class);
                    } catch (IOException e) {
                        logger.error("failed to convert httpEntity to String",e);
                        resultFuture.completeExceptionally(e);
                    }
                    // 最终调用 resultFuture.complete 或 resultFuture.completeExceptionally
                    resultFuture.complete(Collections.singleton(new Tuple2<>(score, student)));
                } else {
                    logger.error("got wrong status code" + httpResponse.getStatusLine().getStatusCode());
                    resultFuture.completeExceptionally(new RuntimeException("got wrong status code" + httpResponse.getStatusLine().getStatusCode()));
                }
            }

            @Override
            public void failed(Exception e) {
                logger.error("failed to get httpresult",e);
                resultFuture.completeExceptionally(e);
            }

            @Override
            public void cancelled() {
                logger.error("http request cancelled:" + httpGet);
                resultFuture.completeExceptionally(new CancellationException("http request cancelled:" + httpGet));
            }
        });
    }
}
  1. 定义 scoreDataStream 流, 这里可以从 kafka 中读取
DataStream<Score> scoreDataStream = ...
  1. AsyncDataStream.unorderedWait / AsyncDataStream.orderedWait 对 scoreDataStream 流进行处理
// 每个子任务能同时异步执行的元素个数
int capacity = ...
// 异步 io 算子中元素最大等待时间。如果超过这个时间,客户端没有返回结果,会抛出异常
int timeout = ...
// 每个子任务的最大线程池个数
int poolMaxTotal = ...
// http 连接超时 ms(三次握手建立连接的时间)
int connectionTimeOut = ...
// 调用 AsyncDataStream.unorderedWait/orderedWait 对 scoreDataStream 流进行异步io 处理,得到 mergedStream
DataStream<Tuple2<Score, Student>> mergedStream =
         AsyncDataStream.unorderedWait(
               scoreDataStream,
               new AsyncHttpRequest(capacity, connectionTimeOut , timeout),
               timeout, TimeUnit.MILLISECONDS,
               poolMaxTotal );
// 可以对 mergedStream 流进行后续处理
  1. 附 Score 和 Student 类
    Score.java
public class Score {
    public String name;
    public int score;
    public int stu_id;
    public Score(){

    }
    public Score(String name, int score, int stu_id) {
        this.name = name;
        this.score = score;
        this.stu_id = stu_id;
    }
    public String getName() {
        return name;
    }
    public int getScore() {
        return score;
    }
    public int getStuId() {
        return stu_id;
    }
    @Override
    public String toString(){
        return "Score{" +
                "stu_id=" + stu_id +
                ", name='" + name + '\'' +
                ", score='" + score +
                '}';
    }
}

Student.java

public class Student {
    public int id;
    public String name;
    public String password;
    public int age;

    public Student() {
    }

    public Student(int id, String name, String password, int age) {
        this.id = id;
        this.name = name;
        this.password = password;
        this.age = age;
    }

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getPassword() {
        return password;
    }

    public void setPassword(String password) {
        this.password = password;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    @Override
    public String toString() {
        return "Student{" +
                "id=" + id +
                ", name='" + name + '\'' +
                ", password='" + password + '\'' +
                ", age=" + age +
                '}';
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值