java Flink(三十二)Flink的异步IO访问外部数据

 Flink的异步官方的介绍:

当我们使用 Flink 在与外部系统进行交互时(例如:使用存储在数据库中的数据来丰富流事件),这时便需要注意 Flink系统外部系统的通信延迟了。

       我们使用 MapFunction() 的方式与外部数据库交互,使用的 同步交互 的方式。即:将请求发送到数据库,并MapFunction等待直到收到响应。在许多情况下,这种等待占据了功能的绝大部分时间。

       与数据库的异步交互,意味着单个并行函数实例可以同时处理许多请求并同时接收响应。这样,等待时间便可以与发送其他请求和接收响应重叠。至少,等待时间将被分摊到多个请求上。在大多数情况下,会使系统有更高的吞吐量。

 我们这里用异步的http请求(异步的前提是外部数据支持异步操作)来记录这个内容

 

 DataLocation POJO类:

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.util.EntityUtils;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Collections;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

public class HttpAsyncMain {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //创建pro配置
        Properties pro = new Properties();
        //添加集群配置信息
        pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "xx:9092,xx:9092,xx:9092");
        pro.put(ConsumerConfig.GROUP_ID_CONFIG, "group");
        pro.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<String>("locations", new SimpleStringSchema(), pro);
        DataStreamSource<String> kafkaStream = env.addSource(kafkaConsumer);

        //开启异步处理数据 异步任务超过10个就会阻塞,0代表不设置超时时间
        DataStream<DataLocation> resultStream = AsyncDataStream.unorderedWait(kafkaStream, new AsyncDatabaseRequest(), 0, TimeUnit.MICROSECONDS, 10);
        resultStream.print("result:");
        env.execute();
    }

    //异步请求第三方API
    public static class AsyncDatabaseRequest extends RichAsyncFunction<String, DataLocation> {

        private transient CloseableHttpAsyncClient httpAsyncClient = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            //初始化异步HttpClient
            RequestConfig requestConfig = RequestConfig.custom()
                    .setSocketTimeout(3000) //设置socket超时时间
                    .setConnectTimeout(3000) //设置连接超时时间
                    .build();
            HttpAsyncClients.custom()
                    .setMaxConnTotal(20) //设置最大连接数
                    .setDefaultRequestConfig(requestConfig).build();
        }


        public void asyncInvoke(String input, final ResultFuture<DataLocation> resultFuture) throws Exception {
            String[] fields = input.split(",");
            String id = fields[0];
            String name = fields[1];
            String date = fields[2];
            String lng = fields[3];
            String lat = fields[4];

            //API 地址
            String url = "";
            //执行查询
            final HttpGet request1 = new HttpGet(url);
            final Future<HttpResponse> future = httpAsyncClient.execute(request1, null);

            CompletableFuture.supplyAsync(new Supplier<String>() {
                public String get() {
                    try {
                        HttpResponse response = future.get();
                        String province = null;
                        if (response.getStatusLine().getStatusCode() == 200) {
                            //获取请求的Json字符串
                            String result = EntityUtils.toString(response.getEntity());
                            //将Json字符串转成Json对象
                            JSONObject jsonObject = JSON.parseObject(result);
                            //获取位置信息
                            JSONObject regeocode = jsonObject.getJSONObject("regeocode");

                            if (regeocode != null && !regeocode.isEmpty()) {
                                JSONObject address = regeocode.getJSONObject("addressComponent");
                                //获取省份信息
                                String province1 = address.getString("province");
                            }
                        }
                        return province;
                    } catch (Exception e) {
                        return null;
                    }
                }
            }).thenAccept((String dbResult) -> {
                resultFuture.complete(Collections.singleton(DataLocation.of(id, name, date, dbResult)));
            });
        }
    }
}

异步请求主程序:

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.util.EntityUtils;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Collections;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

public class HttpAsyncMain {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //创建pro配置
        Properties pro = new Properties();
        //添加集群配置信息
        pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "xx:9092,xx:9092,xx:9092");
        pro.put(ConsumerConfig.GROUP_ID_CONFIG, "group");
        pro.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<String>("locations", new SimpleStringSchema(), pro);
        DataStreamSource<String> kafkaStream = env.addSource(kafkaConsumer);

        //开启异步处理数据 异步任务超过10个就会阻塞,0代表不设置超时时间
        DataStream<DataLocation> resultStream = AsyncDataStream.unorderedWait(kafkaStream, new AsyncDatabaseRequest(), 0, TimeUnit.MICROSECONDS, 10);
        resultStream.print("result:");
        env.execute();
    }

    //异步请求第三方API
    public static class AsyncDatabaseRequest extends RichAsyncFunction<String, DataLocation> {

        private transient CloseableHttpAsyncClient httpAsyncClient = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            //初始化异步HttpClient
            RequestConfig requestConfig = RequestConfig.custom()
                    .setSocketTimeout(3000) //设置socket超时时间
                    .setConnectTimeout(3000) //设置连接超时时间
                    .build();
            HttpAsyncClients.custom()
                    .setMaxConnTotal(20) //设置最大连接数
                    .setDefaultRequestConfig(requestConfig).build();
        }


        public void asyncInvoke(String input, final ResultFuture<DataLocation> resultFuture) throws Exception {
            String[] fields = input.split(",");
            String id = fields[0];
            String name = fields[1];
            String date = fields[2];
            String lng = fields[3];
            String lat = fields[4];

            //API 地址
            String url = "";
            //执行查询
            final HttpGet request1 = new HttpGet(url);
            final Future<HttpResponse> future = httpAsyncClient.execute(request1, null);

            CompletableFuture.supplyAsync(new Supplier<String>() {
                public String get() {
                    try {
                        HttpResponse response = future.get();
                        String province = null;
                        if (response.getStatusLine().getStatusCode() == 200) {
                            //获取请求的Json字符串
                            String result = EntityUtils.toString(response.getEntity());
                            //将Json字符串转成Json对象
                            JSONObject jsonObject = JSON.parseObject(result);
                            //获取位置信息
                            JSONObject regeocode = jsonObject.getJSONObject("regeocode");

                            if (regeocode != null && !regeocode.isEmpty()) {
                                JSONObject address = regeocode.getJSONObject("addressComponent");
                                //获取省份信息
                                String province1 = address.getString("province");
                            }
                        }
                        return province;
                    } catch (Exception e) {
                        return null;
                    }
                }
            }).thenAccept((String dbResult) -> {
                resultFuture.complete(Collections.singleton(DataLocation.of(id, name, date, dbResult)));
            });
        }
    }
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值