keyedState状态的存活时间TimeToLive(TTL)
KeyedState_Only
可以设置状态存活指定的时间,超过指定的时间,状态会被清除
ValueState的TTL
package cn._51doit.flink.day07;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
*
* 1.只能为KeyedState设置TTL
* 2.ValueState中的每个Key都有独立的TTL
* 3.开启checkpointing,程序出错,只有TTL没有超时,可以恢复上一次checkpoint的数据
*
*/
public class ValueStateTTLDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//开启checkpoint
env.enableCheckpointing(10000);
//设置重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000));
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
if (line.startsWith("error")) {
throw new RuntimeException("有错误数据出现,抛出异常!");
}
String[] words = line.split(" ");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple2<String, Integer>> res = keyedStream.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {
private ValueState<Integer> valueState;
//在open方法中初始化状态或恢复状态
@Override
public void open(Configuration parameters) throws Exception {
//定义状态描述器(描述状态的类型、名称)
//为状态设置TTL,就是先给状态描述器设置TTL(Time To Live)
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.seconds(15))
//.setUpdateType(StateTtlConfig.UpdateType.OnReadAndWrite)//当创建、修改或读取状态,都会更新对应TTL
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) //默认的,当创建、修改会更新对应key的TTL时间
//注意,状态超时和清除状态并不是同时进行的。超时后,后台有个线程定时清除超时的状态,所以可能会发生下面的情况 .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) //默认的,状态只要超时,即使后台线程没有将状态清除,也不返回
//.setStateVisibility(StateTtlConfig.StateVisibility.ReturnExpiredIfNotCleanedUp) //只好没有被清除,就会可以访问到
.build();
ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("wc-state", Integer.class);
//将TTLConfig关联到stateDescriptor
stateDescriptor.enableTimeToLive(ttlConfig);
//初始化或恢复状态
valueState = getRuntimeContext().getState(stateDescriptor);
}
@Override
public Tuple2<String, Integer> map(Tuple2<String, Integer> input) throws Exception {
Integer current = input.f1;
//内部会获取当前的key,根据当前的key取出对应的value
Integer history = valueState.value();
if (history == null) {
history = 0;
}
current += history;
//更新状态
valueState.update(current);
//输出数据
input.f1 = current;
return input;
}
});
res.print();
env.execute();
}
}
需要注意:
-
只能为KeyedState设置TTL
-
ValueState中的每个Key都有独立的TTL,同一个分区不同组(key)的数据都有自己的TTL。
-
存留时间是从最新的一条同组数据提交开始计算的,不断进行同组数据的添加,就不会超过状态存留时间,状态也不会被清掉。
-
开启checkpointing,程序出错,只要TTL没有超时,可以恢复上一次checkpoint的数据
MapState的TTL
package cn._51doit.flink.day07;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* Map<K, MAP<k, v>>
*
* MapState设置TTL,是为里面的每个小k单独设置ttl
*
*/
public class MapStateTTLDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//开启checkpoint
env.enableCheckpointing(10000);
//辽宁省,沈阳市,3000
//辽宁省,大连市,4000
//辽宁省,鞍山市,4000
//河北省,廊坊市,2000
//河北省,邢台市,3000
//河北省,石家庄市,2000
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888, "\n", 5);
//对数据进行整理
SingleOutputStreamOperator<Tuple3<String, String, Integer>> tpStream = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String line) throws Exception {
String[] fields = line.split(",");
String province = fields[0];
String city = fields[1];
int money = Integer.parseInt(fields[2]);
return Tuple3.of(province, city, money);
}
});
//按照省份进行keyBy,将同一个省份的数据分到同一个分区中,并且按照城市累加金额
KeyedStream<Tuple3<String, String, Integer>, String> keyedStream = tpStream.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple3<String, String, Integer>> res = keyedStream.map(new CityMoneyFunction2());
res.print();
env.execute();
}
private static class CityMoneyFunction2 extends RichMapFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>> {
private MapState<String, Integer> mapState;
@Override
public void open(Configuration parameters) throws Exception {
//定义MapStateDescriptor
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.seconds(15)).build();
MapStateDescriptor<String, Integer> stateDescriptor = new MapStateDescriptor<>("city-money-state", String.class, Integer.class);
stateDescriptor.enableTimeToLive(ttlConfig);
//初始化或恢复状态
mapState = getRuntimeContext().getMapState(stateDescriptor);
}
@Override
public Tuple3<String, String, Integer> map(Tuple3<String, String, Integer> input) throws Exception {
String city = input.f1;
Integer money = input.f2;
Integer history = mapState.get(city);//根据小key去小value
if (history == null) {
history = 0;
}
money += history;
//更新状态
mapState.put(city, money);
//输出数据
input.f2 = money;
return input;
}
}
}
-
Map<K, MAP<k, v>> 区别于redis只能给大K设置TTL,MapState设置TTL,是为里面的每个小k单独设置ttl
ListState的TTL
package cn._51doit.flink.day07;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* 为ListState设置TTL
* ListState : Map<K, List<v>>
*
* ListState的TTL是为List中的每一个元素即v设置TTL
*
*/
public class ListStateTTLDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//开启checkpoint
env.enableCheckpointing(5000);
//将同一个用户,最近的10个行为保存起来
//u001,view
//u001,pay
//u002,view
//u002,view
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888, "\n", 5);
SingleOutputStreamOperator<Tuple2<String, String>> tpStream = lines.map(new MapFunction<String, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> map(String value) throws Exception {
if (value.startsWith("error")) {
throw new RuntimeException("数据出问题了!");
}
String[] fields = value.split(",");
String uid = fields[0];
String event = fields[1];
return Tuple2.of(uid, event);
}
});
KeyedStream<Tuple2<String, String>, String> keyedStream = tpStream.keyBy(t -> t.f0);
//将用一个用户的行为数据按照先后顺序保存起来
SingleOutputStreamOperator<Tuple2<String, List<String>>> res = keyedStream.map(new UserEventFunction2());
res.print();
env.execute();
}
private static class UserEventFunction2 extends RichMapFunction<Tuple2<String, String>, Tuple2<String, List<String>>> {
private ListState<String> listState;
@Override
public void open(Configuration parameters) throws Exception {
//定义状态描述器
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.seconds(15)).build();
ListStateDescriptor<String> stateDescriptor = new ListStateDescriptor<>("event-state", String.class);
stateDescriptor.enableTimeToLive(ttlConfig);
//初始化或恢复状态
listState = getRuntimeContext().getListState(stateDescriptor);
}
@Override
public Tuple2<String, List<String>> map(Tuple2<String, String> input) throws Exception {
String event = input.f1;
listState.add(event);
//带TTL的ListState,不能强转成ArrayList
ArrayList<String> events = new ArrayList<>();
for (String e : listState.get()) {
events.add(e);
}
return Tuple2.of(input.f0, events);
}
}
}