一、State的存活时间
任何的keyed state都有存活时间,如果配置了TTL;,且状态值已过期,会尽大可能清除对应的值。
设置ValueState存活时间
package cn._51doit.flink.day08;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
//设置ValueState的存活时间
public class KeyedStateTTLDemo {
public static void main(String[] args) throws Exception{
//创建Flink流计算执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(10000);
//设置重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000));
//创建DataStream
//Source
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//调用Transformation开始
//调用Transformation
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
if("error".equals(word)) {
throw new RuntimeException("出现异常了!!!!!");
}
//new Tuple2<String, Integer>(word, 1)
collector.collect(Tuple2.of(word, 1));
}
}
});
//分组
KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);
keyed.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {
private transient ValueState<Integer> counter;
@Override
public void open(Configuration parameters) throws Exception {
//定义一个状态TTLCOnfig
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.seconds(10))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.build();
//想使用状态,先定义一个状态描述器(State的类型,名称)
ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("wc-desc", Integer.class);
//关联状态描述器
stateDescriptor.enableTimeToLive(ttlConfig);
//初始化或恢复历史状态
counter = getRuntimeContext().getState(stateDescriptor);
}
@Override
public Tuple2<String, Integer> map(Tuple2<String, Integer> input) throws Exception {
//String word = input.f0;
Integer currentCount = input.f1;
//从ValueState中取出历史次数
Integer historyCount = counter.value(); //获取当前key对应的value
if(historyCount == null) {
historyCount = 0;
}
Integer total = historyCount + currentCount; //累加
//跟新状态(内存中)
counter.update(total);
input.f1 = total; //累加后的次数
return input;
}
}).print();
//启动执行
env.execute("StreamingWordCount");
}
}
二、案例统计去重人数
用户ID,活动ID,事件类型(1:浏览,2:参与)、
user1, A, 1
user1, A, 1
user1, A, 2
user2, A, 1
user2, B, 1
统计:各个活动,各个事件的人数和次数
package cn._51doit.flink.day08;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
public class ActivityCount {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//对数据进行整理
SingleOutputStreamOperator<Tuple3<String, String, String>> tpDataStream = lines.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String line) throws Exception {
String[] fields = line.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
KeyedStream<Tuple3<String, String, String>, Tuple2<String, String>> keyed = tpDataStream.keyBy(new KeySelector<Tuple3<String, String, String>, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> getKey(Tuple3<String, String, String> value) throws Exception {
return Tuple2.of(value.f1, value.f2);
}
});
//KeyedStream<Tuple3<String, String, String>, Tuple2<String, String>> keyed = tpDataStream.keyBy(t -> Tuple2.of(t.f1, t.f2), TypeInformation.of(new TypeHint<Tuple2<String, String>>() {}));
SingleOutputStreamOperator<Tuple4<String, String, Long, Long>> result = keyed.process(new ActivityCountFunction());
result.print();
env.execute();
}
}
定义ActivityCountFunction:
package cn._51doit.flink.day08;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import java.util.HashSet;
/**
* 统计次数和人数的
* 人数要去重
*/
public class ActivityCountFunction extends KeyedProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, Tuple4<String, String, Long, Long>> {
private transient ValueState<Long> actCountState;
private transient ValueState<HashSet<String>> userDisState;
@Override
public void open(Configuration parameters) throws Exception {
ValueStateDescriptor<Long> stateDescriptor1 = new ValueStateDescriptor<>("ac-count", Long.class);
actCountState = getRuntimeContext().getState(stateDescriptor1);
ValueStateDescriptor<HashSet<String>> stateDescriptor2 = new ValueStateDescriptor<HashSet<String>>("dis-ac-count", TypeInformation.of(new TypeHint<HashSet<String>>(){}));
userDisState = getRuntimeContext().getState(stateDescriptor2);
}
@Override
public void processElement(Tuple3<String, String, String> value, Context ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {
//计算次数
Long historyCount = actCountState.value();
if(historyCount == null) {
historyCount = 0L;
}
Long totalCount = historyCount + 1;
actCountState.update(totalCount);
//计算人数
HashSet<String> disUserSet = userDisState.value();
if (disUserSet == null) {
disUserSet = new HashSet<>();
}
disUserSet.add(value.f0);
userDisState.update(disUserSet);
//输出结果
out.collect(Tuple4.of(value.f1, value.f2, totalCount, (long) disUserSet.size()));
}
}
三、Broadcast State的使用
实时关连数据库的维度数据, 将日志数据和维度数据关联
user1, 浏览, 1, 1
user1, 浏览, 1, 1
package cn._51doit.flink.day08;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.util.Collector;
import java.util.HashSet;
public class BrocastStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//mysql维度表 -> canal -> kafka -> flinkKafkaSource
//INSERT,1,浏览
//INSERT,2,参与
//INSERT,3,消费
//UPDATE,3,退出
//DELETE,3,删除
DataStreamSource<String> dicDataStream = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<Tuple3<String, String, String>> dicTupleStream = dicDataStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
MapStateDescriptor<String, String> broadcastMapStateDescriptor = new MapStateDescriptor<>("dic-state", String.class, String.class);
//返回一个广播的流
BroadcastStream<Tuple3<String, String, String>> broadcastStream = dicTupleStream.broadcast(broadcastMapStateDescriptor);
//user1,A,1
DataStreamSource<String> actDataStream = env.socketTextStream("localhost", 9999);
//对数据进行整理
SingleOutputStreamOperator<Tuple3<String, String, String>> tpDataStream = actDataStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String line) throws Exception {
String[] fields = line.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
KeyedStream<Tuple3<String, String, String>, Tuple2<String,String>> keyedStream = tpDataStream.keyBy(t -> Tuple2.of(t.f1, t.f2), TypeInformation.of(new TypeHint<Tuple2<String,String>>() {}));
keyedStream
.connect(broadcastStream)
.process(new MyBroadcastProcessFunc(broadcastMapStateDescriptor))
.print();
env.execute();
}
private static class MyBroadcastProcessFunc extends KeyedBroadcastProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, Tuple3<String, String, String>, Tuple4<String, String, Long, Long>> {
private MapStateDescriptor<String, String> broadcastMapStateDescriptor;
public MyBroadcastProcessFunc(){}
public MyBroadcastProcessFunc(MapStateDescriptor<String, String> broadcastMapStateDescriptor) {
this.broadcastMapStateDescriptor = broadcastMapStateDescriptor;
}
private transient ValueState<Long> actCountState;
private transient ValueState<HashSet<String>> userDisState;
@Override
public void open(Configuration parameters) throws Exception {
ValueStateDescriptor<Long> stateDescriptor1 = new ValueStateDescriptor<>("ac-count", Long.class);
actCountState = getRuntimeContext().getState(stateDescriptor1);
ValueStateDescriptor<HashSet<String>> stateDescriptor2 = new ValueStateDescriptor<HashSet<String>>("dis-ac-count", TypeInformation.of(new TypeHint<HashSet<String>>(){}));
userDisState = getRuntimeContext().getState(stateDescriptor2);
}
/**
* 处理输入的每一条活动的数据(事实)就是你按照活动ID和活动分类key的 keyedStream
* 例如 user1,A,1
* @param value
* @param ctx
* @param out
* @throws Exception
*/
@Override
public void processElement(Tuple3<String, String, String> value, ReadOnlyContext ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {
//使用ReadOnlyContext获取ReadOnlyBroadcastState
//1,参与
ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(broadcastMapStateDescriptor);
//计算次数
Long historyCount = actCountState.value();
if(historyCount == null) {
historyCount = 0L;
}
Long totalCount = historyCount + 1;
actCountState.update(totalCount);
//计算人数
HashSet<String> disUserSet = userDisState.value();
if (disUserSet == null) {
disUserSet = new HashSet<>();
}
disUserSet.add(value.f0);
userDisState.update(disUserSet);
//输出结果
String eventId = value.f2;
String eventName = broadcastState.get(eventId);
out.collect(Tuple4.of(value.f1, eventName, totalCount, (long) disUserSet.size()));
}
/**
* 处理输入的每一条维度数据,BroadcastStream<Tuple3<String, String, String>>
* 例如:INSERT,1,浏览
* @param value
* @param ctx
* @param out
* @throws Exception
*/
@Override
public void processBroadcastElement(Tuple3<String, String, String> value, Context ctx, Collector<Tuple4<String, String, Long, Long>> out) throws Exception {
//INSERT、UPDATE、DELETE
String type = value.f0;
String eventId = value.f1;
String eventName = value.f2;
//获取广播状态
BroadcastState<String, String> broadcastState = ctx.getBroadcastState(broadcastMapStateDescriptor);
//将广播的状态存起来或更新、删除
if("DELETE".equals(type)) {
broadcastState.remove(eventId);
} else {
broadcastState.put(eventId, eventName);
}
}
}
}
四、QueryState基本使用
正常情况下,flink的数据写到外部数据库,web系统调用,但是也可以通过queryState直接从flink查询,目前尚不成熟
1、导入依赖包和客户端
package cn._51doit.flink.day08;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.queryablestate.client.QueryableStateClient;
import java.util.concurrent.CompletableFuture;
public class QueryStateClientDemo {
public static void main(String[] args) throws Exception {
QueryableStateClient client = new QueryableStateClient("localhost", 9069);
//初始化状态数据或恢复历史状态数据
ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>(
"wc-state", //指定状态描述器的名称
Integer.class //存储数据的类型
);
CompletableFuture<ValueState<Integer>> resultFuture = client.getKvState(
JobID.fromHexString("23ba9de14e4135f6eb91d62af859c51e"), //job的ID
"my-query-name", //可查询的state的名称
"spark", //查询的key
BasicTypeInfo.STRING_TYPE_INFO,
stateDescriptor);
resultFuture.thenAccept(response -> {
try {
Integer res = response.value();
System.out.println(res);
} catch (Exception e) {
e.printStackTrace();
}
});
Thread.sleep(5000);
}
}
2、QueryableKeyedStateDemo
package cn._51doit.flink.day08;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.QueryableStateOptions;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class QueryableKeyedStateDemo {
public static class MyQueryStateRichMapFunction extends
RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> {
//使用flink的ValueState保存单词对应的次数
private transient ValueState<Integer> countState;
@Override
public void open(Configuration parameters) throws Exception {
//初始化状态数据或恢复历史状态数据
ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>(
"wc-state", //指定状态描述器的名称
Integer.class //存储数据的类型
);
stateDescriptor.setQueryable("my-query-name"); //设置状态可以查询,并指定状态查询名称
//获取getRuntimeContext并根据状态描述器取出对应的State
countState = getRuntimeContext().getState(stateDescriptor);
}
@Override
public Tuple2<String, Integer> map(Tuple2<String, Integer> tp) throws Exception {
Integer current = tp.f1; //获取当前输入的单词的次数
Integer counts = countState.value(); //获取历史的次数
if (counts == null) { //历史次数是否为空
counts = 0; //如果历史床头为空,初始值设置为0
}
int total = current + counts; //将当前的次数和历史次数进行累加
countState.update(total); //更新状态
tp.f1 = total; //将累加后的次数放入到tp
return tp; //返回tp
}
}
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
Configuration config = params.getConfiguration();
//启用Queryable State服务相关参赛
config.setInteger("rest.port", 8081);
config.setBoolean(QueryableStateOptions.ENABLE_QUERYABLE_STATE_PROXY_SERVER, true);
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);
//开启checkpoint【检查点,可以将任务计算的中间结果(状态数据保存起来)】
env.enableCheckpointing(30000);
//设置重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.seconds(5)));
//从指定的Socket地址和端口创建DataStream
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将单词和1组合,放入到Tuple2中
DataStream<Tuple2<String, Integer>> wordAndOne = lines.map(
new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
}
);
//按照单词进行分组
KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);
//分完组,相同的key会进到相同的组,每一个组都维护自己的状态数据
DataStream<Tuple2<String, Integer>> result = keyed.map(new MyQueryStateRichMapFunction());
result.print();
env.execute();
}
}
五、异步IO的使用(多线程)
外部数据库数据量大,不是key-value类型,或者拿不到的数据(征信接口数据),无法通过broadcast广播出去
调用高德地图API发送http请求:
1、导入http异步依赖
2、AsyncQueryFromHttpDemo2
package cn._51doit.flink.day08.async;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.concurrent.TimeUnit;
public class AsyncQueryFromHttpDemo2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置job的重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000));
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
String url = "https://restapi.amap.com/v3/geocode/regeo"; //异步请求高德地图的地址
String key = "4924f7ef5c86a278f5500851541cdcff"; //请求高德地图的秘钥,注册高德地图开发者后获得
int capacity = 50; //最大异步并发请求数量
//使用AsyncDataStream调用unorderedWait方法,并传入异步请求的Function
SingleOutputStreamOperator<LogBean> result = AsyncDataStream.unorderedWait(
lines, //输入的数据流
new AsyncHttpGeoQueryFunction(url, key, capacity), //异步查询的Function实例
3000, //超时时间
TimeUnit.MILLISECONDS, //时间单位
capacity);//异步请求队列最大的数量,不传该参数默认值为100
result.print();
env.execute();
}
}
3、AsyncHttpGeoQueryFunction
package cn._51doit.flink.day08.async;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
import org.apache.http.util.EntityUtils;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.function.Supplier;
public class AsyncHttpGeoQueryFunction extends RichAsyncFunction<String, LogBean> {
private transient CloseableHttpAsyncClient httpclient; //异步请求的HttpClient
private String url; //请求高德地图URL地址
private String key; //请求高德地图的秘钥,注册高德地图开发者后获得
private int maxConnTotal; //异步HTTPClient支持的最大连接
public AsyncHttpGeoQueryFunction(String url, String key, int maxConnTotal) {
this.url = url;
this.key = key;
this.maxConnTotal = maxConnTotal;
}
@Override
public void open(Configuration parameters) throws Exception {
RequestConfig requestConfig = RequestConfig.custom().build();
httpclient = HttpAsyncClients.custom() //创建HttpAsyncClients请求连接池
.setMaxConnTotal(maxConnTotal) //设置最大连接数
.setDefaultRequestConfig(requestConfig).build();
httpclient.start(); //启动异步请求httpClient
}
@Override
public void asyncInvoke(String line, ResultFuture<LogBean> resultFuture) throws Exception {
//使用fastjson将json字符串解析成json对象
LogBean bean = JSON.parseObject(line, LogBean.class);
double longitude = bean.longitude; //获取经度
double latitude = bean.latitude; //获取维度
//将经纬度和高德地图的key与请求的url进行拼接
HttpGet httpGet = new HttpGet(url + "?location=" + longitude + "," + latitude + "&key=" + key);
//发送异步请求,返回Future
Future<HttpResponse> future = httpclient.execute(httpGet, null);
CompletableFuture.supplyAsync(new Supplier<LogBean>() {
@Override
public LogBean get() {
try {
HttpResponse response = future.get();
String province = null;
String city = null;
if (response.getStatusLine().getStatusCode() == 200) {
//解析返回的结果,获取省份、城市等信息
String result = EntityUtils.toString(response.getEntity());
JSONObject jsonObj = JSON.parseObject(result);
JSONObject regeocode = jsonObj.getJSONObject("regeocode");
if (regeocode != null && !regeocode.isEmpty()) {
JSONObject address = regeocode.getJSONObject("addressComponent");
province = address.getString("province");
city = address.getString("city");
}
}
bean.province = province; //将返回的结果给省份赋值
bean.city = city; //将返回的结果给城市赋值
return bean;
} catch (Exception e) {
return null;
}
}
}).thenAccept((LogBean result) -> {
//将结果添加到resultFuture中输出(complete方法的参数只能为集合,如果只有一个元素,就返回一个单例集合)
resultFuture.complete(Collections.singleton(result));
});
}
@Override
public void close() throws Exception {
httpclient.close(); //关闭HttpAsyncClients请求连接池
}
}
4、LogBean
package cn._51doit.flink.day08.async;
public class LogBean {
public String uid;
public Double longitude;
public Double latitude;
public String province;
public String city;
public LogBean(){}
public LogBean(String uid, Double longitude, Double latitude) {
this.uid = uid;
this.longitude = longitude;
this.latitude = latitude;
}
public static LogBean of(String uid, Double longitude, Double latitude) {
return new LogBean(uid, longitude, latitude);
}
@Override
public String toString() {
return "LogBean{" +
"uid='" + uid + '\'' +
", longitude=" + longitude +
", latitude=" + latitude +
", province='" + province + '\'' +
", city='" + city + '\'' +
'}';
}
}
向mysql发送请求:
1、引入连接池依赖
2、AsyncQueryFromMySQL
package cn._51doit.flink.day08.async;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.concurrent.TimeUnit;
public class AsyncQueryFromMySQL {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000)); //设置job的重启策略
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
int capacity = 50;
DataStream<Tuple2<String, String>> result = AsyncDataStream.orderedWait(
lines, //输入的数据流
new MySQLAsyncFunction(capacity), //异步查询的Function实例
3000, //超时时间
TimeUnit.MILLISECONDS, //时间单位
capacity); //异步请求队列最大的数量,不传该参数默认值为100
result.print();
env.execute();
}
}
3、MySQLAsyncFunction
package cn._51doit.flink.day08.async;
import com.alibaba.druid.pool.DruidDataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Supplier;
public class MySQLAsyncFunction extends RichAsyncFunction<String, Tuple2<String, String>> {
private transient DruidDataSource dataSource; //使用alibaba的Druid数据库连接池
private transient ExecutorService executorService; //用于提交多个异步请求的线程池
private int maxConnTotal; //线程池最大线程数量
public MySQLAsyncFunction(int maxConnTotal) {
this.maxConnTotal = maxConnTotal;
}
@Override
public void open(Configuration parameters) throws Exception {
executorService = Executors.newFixedThreadPool(maxConnTotal); //创建固定的大小的线程池
dataSource = new DruidDataSource(); //创建数据库连接池并指定对应的参数
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUsername("root");
dataSource.setPassword("123456");
dataSource.setUrl("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8");
dataSource.setMaxActive(maxConnTotal);
}
@Override
public void close() throws Exception {
dataSource.close(); //关闭数据库连接池
executorService.shutdown(); //关闭线程池
}
@Override
public void asyncInvoke(String id, ResultFuture<Tuple2<String, String>> resultFuture) throws Exception {
//调用线程池的submit方法,将查询请求丢入到线程池中异步执行,返回Future对象
Future<String> future = executorService.submit(() -> {
return queryFromMySql(id); //查询数据库的方法
});
CompletableFuture.supplyAsync(new Supplier<String>() {
@Override
public String get() {
try {
return future.get(); //获取查询的结果
} catch (Exception e) {
return null;
}
}
}).thenAccept((String result) -> {
//将id和查询的结果用Tuple2封装,放入到ResultFuture中输出
resultFuture.complete(Collections.singleton(Tuple2.of(id, result)));
});
}
private String queryFromMySql(String param) throws SQLException {
String sql = "SELECT id, info FROM t_data WHERE id = ?";
String result = null;
Connection connection = null;
PreparedStatement stmt = null;
ResultSet rs = null;
try {
connection = dataSource.getConnection();
stmt = connection.prepareStatement(sql);
stmt.setString(1, param); //设置查询参数
rs = stmt.executeQuery(); //执行查询
while (rs.next()) {
result = rs.getString("info"); //返回查询结果
}
} finally {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
if (connection != null) {
connection.close();
}
}
return result;
}
}
4、LogBean
package cn._51doit.flink.day08.async;
public class LogBean {
public String uid;
public Double longitude;
public Double latitude;
public String province;
public String city;
public LogBean(){}
public LogBean(String uid, Double longitude, Double latitude) {
this.uid = uid;
this.longitude = longitude;
this.latitude = latitude;
}
public static LogBean of(String uid, Double longitude, Double latitude) {
return new LogBean(uid, longitude, latitude);
}
@Override
public String toString() {
return "LogBean{" +
"uid='" + uid + '\'' +
", longitude=" + longitude +
", latitude=" + latitude +
", province='" + province + '\'' +
", city='" + city + '\'' +
'}';
}
}
497

被折叠的 条评论
为什么被折叠?



