大数据之flink状态(下)

一、State的存活时间

任何的keyed state都有存活时间,如果配置了TTL;,且状态值已过期,会尽大可能清除对应的值。

设置ValueState存活时间

package cn._51doit.flink.day08;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

//设置ValueState的存活时间
public class KeyedStateTTLDemo {

    public static void main(String[] args) throws Exception{

        //创建Flink流计算执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.enableCheckpointing(10000);
        //设置重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000));

        //创建DataStream
        //Source
        DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);

        //调用Transformation开始
        //调用Transformation
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
                String[] words = line.split(" ");
                for (String word : words) {
                    if("error".equals(word)) {
                        throw new RuntimeException("出现异常了!!!!!");
                    }
                    //new Tuple2<String, Integer>(word, 1)
                    collector.collect(Tuple2.of(word, 1));
                }
            }
        });

        //分组
        KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);

        keyed.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {

            private transient ValueState<Integer> counter;

            @Override
            public void open(Configuration parameters) throws Exception {
                //定义一个状态TTLCOnfig
                StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.seconds(10))
                        .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
                        .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
                        .build();

                //想使用状态,先定义一个状态描述器(State的类型,名称)
                ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("wc-desc", Integer.class);
                //关联状态描述器
                stateDescriptor.enableTimeToLive(ttlConfig);
                //初始化或恢复历史状态
                counter = getRuntimeContext().getState(stateDescriptor);
            }

            @Override
            public Tuple2<String, Integer> map(Tuple2<String, Integer> input) throws Exception {
                //String word = input.f0;
                Integer currentCount = input.f1;
                //从ValueState中取出历史次数
                Integer historyCount = counter.value(); //获取当前key对应的value
                if(historyCount == null) {
                    historyCount = 0;
                }
                Integer total = historyCount + currentCount; //累加
                //跟新状态(内存中)
                counter.update(total);
                input.f1 = total; //累加后的次数
                return input;
            }
        }).print();

        //启动执行
        env.execute("StreamingWordCount");

    }

}

二、案例统计去重人数

用户ID,活动ID,事件类型(1:浏览,2:参与)、
user1, A, 1
user1, A, 1
user1, A, 2
user2, A, 1
user2, B, 1

统计:各个活动,各个事件的人数和次数
package cn._51doit.flink.day08;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

public class ActivityCount {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
        
        //对数据进行整理
        SingleOutputStreamOperator<Tuple3<String, String, String>> tpDataStream = lines.map(new MapFunction<String, Tuple3<String, String, String>>() {
            @Override
            public Tuple3<String, String, String> map(String line) throws Exception {
                String[] fields = line.split(",");
                return Tuple3.of(fields[0], fields[1], fields[2]);
            }
        });

        KeyedStream<Tuple3<String, String, String>, Tuple2<String, String>> keyed = tpDataStream.keyBy(new KeySelector<Tuple3<String, String, String>, Tuple2<String, String>>() {
            @Override
            public Tuple2<String, String> getKey(Tuple3<String, String, String> value) throws Exception {
                return Tuple2.of(value.f1, value.f2);
            }
        });

        //KeyedStream<Tuple3<String, String, String>, Tuple2<String, String>> keyed = tpDataStream.keyBy(t -> Tuple2.of(t.f1, t.f2), TypeInformation.of(new TypeHint<Tuple2<String, String>>() {}));

        SingleOutputStreamOperator<Tuple4<String, String, Long, Long>> result = keyed.process(new ActivityCountFunction());

        result.print();

        env.execute();


    }
}

定义ActivityCountFunction:

package cn._51doit.flink.day08;

import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.util.HashSet;

/**
 * 统计次数和人数的
 * 人数要去重
 */
public class ActivityCountFunction extends KeyedProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, Tuple4<String, String, Long, Long>> {

    private transient ValueState<Long> actCountState;
    private transient ValueState<HashSet<String>> userDisState;

    @Override
    public void open(Configuration parameters) throws Exception {
        ValueStateDescriptor<Long> stateDescriptor1 = new ValueStateDescriptor<>("ac-count", Long.class);
        actCountState = getRuntimeContext().getState(stateDescriptor1);
        ValueStateDescriptor<HashSet<String>> stateDescriptor2 = new ValueStateDescriptor<HashSet<String>>("dis-ac-count", TypeInformation.of(new TypeHint<HashSet<String>>(){}));
        userDisState = getRuntimeContext().getState(stateDescriptor2);
    }
    @Override
    public void processElement(Tuple3<String, String, String> value, Context ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {

        //计算次数
        Long historyCount = actCountState.value();
        if(historyCount == null) {
            historyCount = 0L;
        }
        Long totalCount = historyCount + 1;
        actCountState.update(totalCount);
        //计算人数
        HashSet<String> disUserSet = userDisState.value();
        if (disUserSet == null) {
            disUserSet = new HashSet<>();
        }
        disUserSet.add(value.f0);
        userDisState.update(disUserSet);
        //输出结果
        out.collect(Tuple4.of(value.f1, value.f2, totalCount, (long) disUserSet.size()));
    }
}

三、Broadcast State的使用

实时关连数据库的维度数据, 将日志数据和维度数据关联

user1, 浏览, 1, 1
user1, 浏览, 1, 1
package cn._51doit.flink.day08;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.util.Collector;

import java.util.HashSet;

public class BrocastStateDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //mysql维度表 -> canal -> kafka -> flinkKafkaSource
        //INSERT,1,浏览
        //INSERT,2,参与
        //INSERT,3,消费
        //UPDATE,3,退出
        //DELETE,3,删除
        DataStreamSource<String> dicDataStream = env.socketTextStream("localhost", 8888);

        SingleOutputStreamOperator<Tuple3<String, String, String>> dicTupleStream = dicDataStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
            @Override
            public Tuple3<String, String, String> map(String value) throws Exception {
                String[] fields = value.split(",");
                return Tuple3.of(fields[0], fields[1], fields[2]);
            }
        });

        MapStateDescriptor<String, String> broadcastMapStateDescriptor = new MapStateDescriptor<>("dic-state", String.class, String.class);

        //返回一个广播的流
        BroadcastStream<Tuple3<String, String, String>> broadcastStream = dicTupleStream.broadcast(broadcastMapStateDescriptor);

        //user1,A,1
        DataStreamSource<String> actDataStream = env.socketTextStream("localhost", 9999);

        //对数据进行整理
        SingleOutputStreamOperator<Tuple3<String, String, String>> tpDataStream = actDataStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
            @Override
            public Tuple3<String, String, String> map(String line) throws Exception {
                String[] fields = line.split(",");
                return Tuple3.of(fields[0], fields[1], fields[2]);
            }
        });

        KeyedStream<Tuple3<String, String, String>, Tuple2<String,String>> keyedStream = tpDataStream.keyBy(t -> Tuple2.of(t.f1, t.f2), TypeInformation.of(new TypeHint<Tuple2<String,String>>() {}));

        keyedStream
                .connect(broadcastStream)
                .process(new MyBroadcastProcessFunc(broadcastMapStateDescriptor))
                .print();

        env.execute();
    }


    private static class MyBroadcastProcessFunc extends KeyedBroadcastProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, Tuple3<String, String, String>, Tuple4<String, String, Long, Long>> {

        private MapStateDescriptor<String, String> broadcastMapStateDescriptor;

        public MyBroadcastProcessFunc(){}

        public MyBroadcastProcessFunc(MapStateDescriptor<String, String> broadcastMapStateDescriptor) {
            this.broadcastMapStateDescriptor = broadcastMapStateDescriptor;
        }

        private transient ValueState<Long> actCountState;
        private transient ValueState<HashSet<String>> userDisState;

        @Override
        public void open(Configuration parameters) throws Exception {
            ValueStateDescriptor<Long> stateDescriptor1 = new ValueStateDescriptor<>("ac-count", Long.class);
            actCountState = getRuntimeContext().getState(stateDescriptor1);
            ValueStateDescriptor<HashSet<String>> stateDescriptor2 = new ValueStateDescriptor<HashSet<String>>("dis-ac-count", TypeInformation.of(new TypeHint<HashSet<String>>(){}));
            userDisState = getRuntimeContext().getState(stateDescriptor2);
        }

        /**
         * 处理输入的每一条活动的数据(事实)就是你按照活动ID和活动分类key的 keyedStream
         * 例如 user1,A,1
         * @param value
         * @param ctx
         * @param out
         * @throws Exception
         */
        @Override
        public void processElement(Tuple3<String, String, String> value, ReadOnlyContext ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {

            //使用ReadOnlyContext获取ReadOnlyBroadcastState
            //1,参与
            ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(broadcastMapStateDescriptor);

            //计算次数
            Long historyCount = actCountState.value();
            if(historyCount == null) {
                historyCount = 0L;
            }
            Long totalCount = historyCount + 1;
            actCountState.update(totalCount);
            //计算人数
            HashSet<String> disUserSet = userDisState.value();
            if (disUserSet == null) {
                disUserSet = new HashSet<>();
            }
            disUserSet.add(value.f0);
            userDisState.update(disUserSet);
            //输出结果
            String eventId = value.f2;
            String eventName = broadcastState.get(eventId);

            out.collect(Tuple4.of(value.f1, eventName, totalCount, (long) disUserSet.size()));
        }

        /**
         * 处理输入的每一条维度数据,BroadcastStream<Tuple3<String, String, String>>
         * 例如:INSERT,1,浏览
         * @param value
         * @param ctx
         * @param out
         * @throws Exception
         */
        @Override
        public void processBroadcastElement(Tuple3<String, String, String> value, Context ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {
            //INSERT、UPDATE、DELETE
            String type = value.f0;
            String eventId = value.f1;
            String eventName = value.f2;
            //获取广播状态
            BroadcastState<String, String> broadcastState = ctx.getBroadcastState(broadcastMapStateDescriptor);
            //将广播的状态存起来或更新、删除
            if("DELETE".equals(type)) {
                broadcastState.remove(eventId);
            } else {
                broadcastState.put(eventId, eventName);
            }
        }
    }
}

四、QueryState基本使用

正常情况下,flink的数据写到外部数据库,web系统调用,但是也可以通过queryState直接从flink查询,目前尚不成熟

1、导入依赖包和客户端

package cn._51doit.flink.day08;

import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.queryablestate.client.QueryableStateClient;

import java.util.concurrent.CompletableFuture;

public class QueryStateClientDemo {
    public static void main(String[] args) throws Exception {
        QueryableStateClient client = new QueryableStateClient("localhost", 9069);
        //初始化状态数据或恢复历史状态数据
        ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>(
                "wc-state", //指定状态描述器的名称
                Integer.class //存储数据的类型
        );
        CompletableFuture<ValueState<Integer>> resultFuture = client.getKvState(
                JobID.fromHexString("23ba9de14e4135f6eb91d62af859c51e"), //job的ID
                "my-query-name", //可查询的state的名称
                "spark", //查询的key
                BasicTypeInfo.STRING_TYPE_INFO,
                stateDescriptor);
        resultFuture.thenAccept(response -> {
            try {
                Integer res = response.value();
                System.out.println(res);
            } catch (Exception e) {
                e.printStackTrace();
            }
        });
        Thread.sleep(5000);
    }
}

2、QueryableKeyedStateDemo

package cn._51doit.flink.day08;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.QueryableStateOptions;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class QueryableKeyedStateDemo {

    public static class MyQueryStateRichMapFunction extends
            RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> {
        //使用flink的ValueState保存单词对应的次数
        private transient ValueState<Integer> countState;

        @Override
        public void open(Configuration parameters) throws Exception {
            //初始化状态数据或恢复历史状态数据
            ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>(
                    "wc-state", //指定状态描述器的名称
                    Integer.class //存储数据的类型
            );
            stateDescriptor.setQueryable("my-query-name"); //设置状态可以查询,并指定状态查询名称
            //获取getRuntimeContext并根据状态描述器取出对应的State
            countState = getRuntimeContext().getState(stateDescriptor);
        }

        @Override
        public Tuple2<String, Integer> map(Tuple2<String, Integer> tp) throws Exception {
            Integer current = tp.f1;  //获取当前输入的单词的次数
            Integer counts = countState.value(); //获取历史的次数
            if (counts == null) { //历史次数是否为空
                counts = 0; //如果历史床头为空,初始值设置为0
            }
            int total = current + counts; //将当前的次数和历史次数进行累加
            countState.update(total); //更新状态
            tp.f1 = total; //将累加后的次数放入到tp
            return tp; //返回tp
        }
    }

    public static void main(String[] args) throws Exception {
        ParameterTool params = ParameterTool.fromArgs(args);
        Configuration config = params.getConfiguration();
        //启用Queryable State服务相关参赛
        config.setInteger("rest.port", 8081);
        config.setBoolean(QueryableStateOptions.ENABLE_QUERYABLE_STATE_PROXY_SERVER, true);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);

        //开启checkpoint【检查点,可以将任务计算的中间结果(状态数据保存起来)】
        env.enableCheckpointing(30000);
        //设置重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.seconds(5)));
        //从指定的Socket地址和端口创建DataStream
        DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
        //将单词和1组合,放入到Tuple2中
        DataStream<Tuple2<String, Integer>> wordAndOne = lines.map(
                new MapFunction<String, Tuple2<String, Integer>>() {
                    @Override
                    public Tuple2<String, Integer> map(String word) throws Exception {
                        return Tuple2.of(word, 1);
                    }
                }
        );
        //按照单词进行分组
        KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);
        //分完组,相同的key会进到相同的组,每一个组都维护自己的状态数据
        DataStream<Tuple2<String, Integer>> result = keyed.map(new MyQueryStateRichMapFunction());
        result.print();
        env.execute();
    }
}

五、异步IO的使用(多线程)

外部数据库数据量大,不是key-value类型,或者拿不到的数据(征信接口数据),无法通过broadcast广播出去

调用高德地图API发送http请求:

1、导入http异步依赖

2、AsyncQueryFromHttpDemo2

package cn._51doit.flink.day08.async;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.concurrent.TimeUnit;

public class AsyncQueryFromHttpDemo2 {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置job的重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000));
        DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
        String url = "https://restapi.amap.com/v3/geocode/regeo"; //异步请求高德地图的地址
        String key = "4924f7ef5c86a278f5500851541cdcff"; //请求高德地图的秘钥,注册高德地图开发者后获得
        int capacity = 50; //最大异步并发请求数量
        //使用AsyncDataStream调用unorderedWait方法,并传入异步请求的Function
        SingleOutputStreamOperator<LogBean> result = AsyncDataStream.unorderedWait(
                lines, //输入的数据流
                new AsyncHttpGeoQueryFunction(url, key, capacity), //异步查询的Function实例
                3000, //超时时间
                TimeUnit.MILLISECONDS, //时间单位
                capacity);//异步请求队列最大的数量,不传该参数默认值为100
        result.print();
        env.execute();
    }
}

3、AsyncHttpGeoQueryFunction

package cn._51doit.flink.day08.async;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.util.EntityUtils;

import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.function.Supplier;

public class AsyncHttpGeoQueryFunction extends RichAsyncFunction<String, LogBean> {
    private transient CloseableHttpAsyncClient httpclient; //异步请求的HttpClient
    private String url; //请求高德地图URL地址
    private String key; //请求高德地图的秘钥,注册高德地图开发者后获得
    private int maxConnTotal; //异步HTTPClient支持的最大连接
    public AsyncHttpGeoQueryFunction(String url, String key, int maxConnTotal) {
        this.url = url;
        this.key = key;
        this.maxConnTotal = maxConnTotal;
    }
    @Override
    public void open(Configuration parameters) throws Exception {
        RequestConfig requestConfig = RequestConfig.custom().build();
        httpclient = HttpAsyncClients.custom() //创建HttpAsyncClients请求连接池
                .setMaxConnTotal(maxConnTotal) //设置最大连接数
                .setDefaultRequestConfig(requestConfig).build();
        httpclient.start(); //启动异步请求httpClient
    }

    @Override
    public void asyncInvoke(String line, ResultFuture<LogBean> resultFuture) throws Exception {
        //使用fastjson将json字符串解析成json对象
        LogBean bean = JSON.parseObject(line, LogBean.class);
        double longitude = bean.longitude; //获取经度
        double latitude = bean.latitude; //获取维度
        //将经纬度和高德地图的key与请求的url进行拼接
        HttpGet httpGet = new HttpGet(url + "?location=" + longitude + "," + latitude + "&key=" + key);
        //发送异步请求,返回Future
        Future<HttpResponse> future = httpclient.execute(httpGet, null);
        CompletableFuture.supplyAsync(new Supplier<LogBean>() {
            @Override
            public LogBean get() {
                try {
                    HttpResponse response = future.get();
                    String province = null;
                    String city = null;
                    if (response.getStatusLine().getStatusCode() == 200) {
                        //解析返回的结果,获取省份、城市等信息
                        String result = EntityUtils.toString(response.getEntity());
                        JSONObject jsonObj = JSON.parseObject(result);
                        JSONObject regeocode = jsonObj.getJSONObject("regeocode");
                        if (regeocode != null && !regeocode.isEmpty()) {
                            JSONObject address = regeocode.getJSONObject("addressComponent");
                            province = address.getString("province");
                            city = address.getString("city");
                        }
                    }
                    bean.province = province; //将返回的结果给省份赋值
                    bean.city = city; //将返回的结果给城市赋值
                    return bean;
                } catch (Exception e) {
                    return null;
                }
            }
        }).thenAccept((LogBean result) -> {
            //将结果添加到resultFuture中输出(complete方法的参数只能为集合,如果只有一个元素,就返回一个单例集合)
            resultFuture.complete(Collections.singleton(result));
        });
    }
    @Override
    public void close() throws Exception {
        httpclient.close(); //关闭HttpAsyncClients请求连接池
    }
}

4、LogBean

package cn._51doit.flink.day08.async;

public class LogBean {

    public String uid;

    public Double longitude;

    public Double latitude;

    public String province;

    public String city;

    public LogBean(){}

    public LogBean(String uid, Double longitude, Double latitude) {
        this.uid = uid;
        this.longitude = longitude;
        this.latitude = latitude;
    }

    public static LogBean of(String uid, Double longitude, Double latitude) {
        return new LogBean(uid, longitude, latitude);
    }

    @Override
    public String toString() {
        return "LogBean{" +
                "uid='" + uid + '\'' +
                ", longitude=" + longitude +
                ", latitude=" + latitude +
                ", province='" + province + '\'' +
                ", city='" + city + '\'' +
                '}';
    }
}

向mysql发送请求:

1、引入连接池依赖

2、AsyncQueryFromMySQL

package cn._51doit.flink.day08.async;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.concurrent.TimeUnit;

public class AsyncQueryFromMySQL {


    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000)); //设置job的重启策略
        DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
        int capacity = 50;
        DataStream<Tuple2<String, String>> result = AsyncDataStream.orderedWait(
                lines, //输入的数据流
                new MySQLAsyncFunction(capacity), //异步查询的Function实例
                3000, //超时时间
                TimeUnit.MILLISECONDS, //时间单位
                capacity); //异步请求队列最大的数量,不传该参数默认值为100
        result.print();
        env.execute();

    }
}

3、MySQLAsyncFunction

package cn._51doit.flink.day08.async;

import com.alibaba.druid.pool.DruidDataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Supplier;

public class MySQLAsyncFunction extends RichAsyncFunction<String, Tuple2<String, String>> {
    private transient DruidDataSource dataSource; //使用alibaba的Druid数据库连接池
    private transient ExecutorService executorService; //用于提交多个异步请求的线程池
    private int maxConnTotal; //线程池最大线程数量
    public MySQLAsyncFunction(int maxConnTotal) {
        this.maxConnTotal = maxConnTotal;
    }
    @Override
    public void open(Configuration parameters) throws Exception {
        executorService = Executors.newFixedThreadPool(maxConnTotal); //创建固定的大小的线程池
        dataSource = new DruidDataSource(); //创建数据库连接池并指定对应的参数
        dataSource.setDriverClassName("com.mysql.jdbc.Driver");
        dataSource.setUsername("root");
        dataSource.setPassword("123456");
        dataSource.setUrl("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8");
        dataSource.setMaxActive(maxConnTotal);
    }
    @Override
    public void close() throws Exception {
        dataSource.close(); //关闭数据库连接池
        executorService.shutdown(); //关闭线程池
    }
    @Override
    public void asyncInvoke(String id, ResultFuture<Tuple2<String, String>> resultFuture) throws Exception {
        //调用线程池的submit方法,将查询请求丢入到线程池中异步执行,返回Future对象
        Future<String> future = executorService.submit(() -> {
            return queryFromMySql(id); //查询数据库的方法
        });
        CompletableFuture.supplyAsync(new Supplier<String>() {
            @Override
            public String get() {
                try {
                    return future.get(); //获取查询的结果
                } catch (Exception e) {
                    return null;
                }
            }
        }).thenAccept((String result) -> {
            //将id和查询的结果用Tuple2封装,放入到ResultFuture中输出
            resultFuture.complete(Collections.singleton(Tuple2.of(id, result)));
        });
    }

    private String queryFromMySql(String param) throws SQLException {
        String sql = "SELECT id, info FROM t_data WHERE id = ?";
        String result = null;
        Connection connection = null;
        PreparedStatement stmt = null;
        ResultSet rs = null;
        try {
            connection = dataSource.getConnection();
            stmt = connection.prepareStatement(sql);
            stmt.setString(1, param); //设置查询参数
            rs = stmt.executeQuery(); //执行查询
            while (rs.next()) {
                result = rs.getString("info"); //返回查询结果
            }
        } finally {
            if (rs != null) {
                rs.close();
            }
            if (stmt != null) {
                stmt.close();
            }
            if (connection != null) {
                connection.close();
            }
        }
        return result;
    }
}

4、LogBean

package cn._51doit.flink.day08.async;

public class LogBean {

    public String uid;

    public Double longitude;

    public Double latitude;

    public String province;

    public String city;

    public LogBean(){}

    public LogBean(String uid, Double longitude, Double latitude) {
        this.uid = uid;
        this.longitude = longitude;
        this.latitude = latitude;
    }

    public static LogBean of(String uid, Double longitude, Double latitude) {
        return new LogBean(uid, longitude, latitude);
    }

    @Override
    public String toString() {
        return "LogBean{" +
                "uid='" + uid + '\'' +
                ", longitude=" + longitude +
                ", latitude=" + latitude +
                ", province='" + province + '\'' +
                ", city='" + city + '\'' +
                '}';
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

大数据同盟会

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值