1. 基本方式
自定义flatmap类,在open方法中获取状态。flatMap方法中更新状态。
public static class MyFlatMap extends RichFlatMapFunction<Events,String >{
ValueState<Events> myStatu ;
@Override
public void open(Configuration parameters) throws Exception {
myStatu = getRuntimeContext()
.getState(new ValueStateDescriptor<Events>("my-statu",Events.class));
}
@Override
public void flatMap(Events events, Collector<String> collector) throws Exception {
//访问和更新状态
System.out.println(myStatu.value());
myStatu.update(events);
System.out.println("my value"+ myStatu.value());
}
}
2. 其他状态
keyedState处理流程
先注册描述器
调用富函数的getRuntimeContext
使用状态需要定义成属性(生命)
使用不同的getstate方法(在open生命周期中创建)
状态读写操作
清空状态
public static class MyFlatMap extends RichFlatMapFunction<Events,String >{
ValueState<Events> myStatu ;
ListState<Events> myListStatu;
MapState<String ,Long> myMapStatu;
ReducingState<Events> myReducingStatu;
AggregatingState<Events, String > myAggStatu;
@Override
public void open(Configuration parameters) throws Exception {
myStatu = getRuntimeContext()
.getState(new ValueStateDescriptor<Events>("my-statu",Events.class));
myListStatu = getRuntimeContext()
.getListState(new ListStateDescriptor<Events>("my-list",Events.class));
myMapStatu = getRuntimeContext()
.getMapState(new MapStateDescriptor<String, Long>("my-Map",String.class,Long.class));
myReducingStatu = getRuntimeContext()//还需要定义聚合函数
.getReducingState(new ReducingStateDescriptor<Events>("MY-reduce",
new ReduceFunction<Events>() {
@Override
public Events reduce(Events events, Events t1) throws Exception {
return new Events(events.user,events.url,t1.timestamp);
}
}, Events.class));
myAggStatu = getRuntimeContext()
.getAggregatingState(new AggregatingStateDescriptor<Events, Long, String>(
"my-agg",
new AggregateFunction<Events, Long, String>() {
@Override
public Long createAccumulator() {
return 0L;
}
@Override
public Long add(Events events, Long aLong) {
return aLong+1;
}
@Override
public String getResult(Long aLong) {
return "count"+aLong;
}
@Override
public Long merge(Long aLong, Long acc1) {
return null;
}
},Long.class));
}
@Override
public void flatMap(Events events, Collector<String> collector) throws Exception {
//访问和更新状态
System.out.println(myStatu.value());
myStatu.update(events);
System.out.println("my value"+ myStatu.value());
myListStatu.add(events);
myMapStatu.put(events.user,myMapStatu.get(events.user)==null?0:myMapStatu.get(events.user)+1);
System.out.println("my map value "+events.user+""+myMapStatu.get(events.user));
myAggStatu.add(events);
System.out.println(myAggStatu.get());
myReducingStatu.add(events);
System.out.println("reduce statu"+events.user+""+myReducingStatu.get());
}
}
3. 值状态
案例:周期性输出pv——不开窗定时统计PV。
//统计每个用户访问量
stream.keyBy(data->data.user)
.process(new PeriodPVResult())
.print();
process function
public static class PeriodPVResult extends KeyedProcessFunction<String ,Events, String >{
//定义状态,保存PV,以及有没有定时器
ValueState<Long> countState ;
ValueState<Long> timerState;
@Override
public void open(Configuration parameters) throws Exception {
countState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("count",Long.class));
timerState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("timer",Long.class));
}
@Override
public void processElement(Events events, Context context, Collector<String> collector) throws Exception {
//每来一条数据,统计一次count值
Long count = countState.value();
countState.update(count== null?1:count+1);
//如果么有定时器,注册定时器
if (timerState.value() == null) {
context.timerService().registerEventTimeTimer(events.timestamp + 10 * 1000L);
timerState.update(events.timestamp + 10 * 1000L);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
//定时器触发 输出统计结果
out.collect(ctx.getCurrentKey()+"pv"+countState.value());
// 清空状态
timerState.clear();
//pv 状态不需要清空,需要在之前的计算结果上继续计算
ctx.timerService().registerEventTimeTimer(timestamp+10*1000L);
timerState.update(timestamp+10*1000L);
}
}
4.列表状态
列表状态,把两条流的状态进行保存,
案例:双流join
stream.keyBy(data->data.f0)
.connect(stream2.keyBy(data->data.f0))
.process(new CoProcessFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String >() {
// 定义列表状态,两条流到达的数据
private ListState<Tuple3<String, String, Long>> streamListState;
private ListState<Tuple3<String, String, Long>> stream2ListState2;
@Override
public void open(Configuration parameters) throws Exception {
streamListState = getRuntimeContext().getListState(new ListStateDescriptor<Tuple3<String, String, Long>>("stream1.list", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
stream2ListState2 = getRuntimeContext().getListState(new ListStateDescriptor<Tuple3<String, String, Long>>("stream2.list", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
}
@Override
public void processElement1(Tuple3<String, String, Long> left, Context context, Collector<String> collector) throws Exception {
//获取另一条流的所有数据配对输出
for(Tuple3<String ,String ,Long> right:stream2ListState2.get()){
collector.collect("left:"+left.f0+""+left.f2+"=>"+right);
}
streamListState.add(Tuple3.of(left.f0,left.f1,left.f2));
}
@Override
public void processElement2(Tuple3<String, String, Long> right, Context context, Collector<String> collector) throws Exception {
for(Tuple3<String ,String ,Long> left:stream2ListState2.get()){
collector.collect("right:"+right.f0+""+right.f2+"=>"+left);
}
stream2ListState2.add(Tuple3.of(right.f0,right.f1,right.f2));
}
}
).print();
5.映射状态
mapstate 模拟滚动窗口。
stream.keyBy(data->data.url)
.process(new FakeWindowResult(10000L))
.print();
public static class FakeWindowResult extends KeyedProcessFunction<String ,Events,String >{
private Long windowSize;
public FakeWindowResult(Long windowSize) {
this.windowSize = windowSize;
}
//定义mapstate保存每个窗口的count值
MapState<Long, Long> windowUrlCountMapState;
@Override
public void open(Configuration parameters) throws Exception {
windowUrlCountMapState = getRuntimeContext().getMapState(new MapStateDescriptor<Long, Long>("WINDOW-COUNT",Long.class,Long.class));
}
@Override
public void processElement(Events events, Context context, Collector<String> collector) throws Exception {
//每来一条数据,根据时间戳判断属于哪个窗口《窗口分配器》
Long windowStart = events.timestamp/windowSize*windowSize;
Long windowEnd = windowStart + windowSize;
//注册end-1个定时器
context.timerService().registerEventTimeTimer(windowEnd-1);
//更新状态,增量聚合
if(windowUrlCountMapState.contains(windowStart)){
Long count = windowUrlCountMapState.get(windowStart);
windowUrlCountMapState.put(windowStart, count+1);
}else{
windowUrlCountMapState.put(windowStart,1L);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
Long windowEnd = timestamp+1;
Long windowStart = windowEnd-windowSize;
Long count = windowUrlCountMapState.get(windowStart);
out.collect("窗口"+new Timestamp(windowStart)+"-"+new Timestamp(windowEnd)
+"url"+ctx.getCurrentKey()
+"count"+count
);
//模拟窗口关闭,清除map中的keyvalue
windowUrlCountMapState.remove(windowStart);
}
}
6.聚合状态
案例:每隔5个数据统计平均时间戳检验疏密程度
stream.keyBy(data->data.user)
.flatMap(new AvgTsResult(5L))
.print();
//实现自定义richFlatMapfunc
public static class AvgTsResult extends RichFlatMapFunction<Events,String >{
private Long count;
public AvgTsResult(Long count) {
this.count = count;
}
//定义聚合状态,保存平均时间戳
AggregatingState<Events,Long> avgTsAggState;
//定义值状态保存访问次数
ValueState<Long> countState;
@Override
public void open(Configuration parameters) throws Exception {
avgTsAggState = getRuntimeContext().getAggregatingState(new AggregatingStateDescriptor<Events, Tuple2<Long,Long>, Long>(
"avg-ts",
new AggregateFunction<Events, Tuple2<Long, Long>, Long>() {
@Override
public Tuple2<Long, Long> createAccumulator() {
return Tuple2.of(0L,0L);
}
@Override
public Tuple2<Long, Long> add(Events events, Tuple2<Long, Long> longLongTuple2) {
return Tuple2.of(longLongTuple2.f0+events.timestamp,longLongTuple2.f1+1);
}
@Override
public Long getResult(Tuple2<Long, Long> longLongTuple2) {
return longLongTuple2.f0/longLongTuple2.f1;
}
@Override
public Tuple2<Long, Long> merge(Tuple2<Long, Long> longLongTuple2, Tuple2<Long, Long> acc1) {
return null;
}
}
, Types.TUPLE(Types.LONG, Types.LONG)
));
countState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("count",Long.class));
}
@Override
public void flatMap(Events events, Collector<String> collector) throws Exception {
//每来一条,count+1
Long currCount = countState.value();
if(currCount == null){
currCount=1L;
}else{
currCount++;
}
//更新状态
countState.update(currCount);
avgTsAggState.add(events);
//count达到次数输出结果
if(currCount.equals(count)){
collector.collect(events.user+"过去"+count+"次"+"访问平均时间戳是:"+avgTsAggState.get());
//清理状态
countState.clear();
avgTsAggState.clear();
}
}
}
7.状态生存时间(TTL)
如果状态在内存里的存在时间已经超过TTL,clear它,
ValueStateDescriptor<Events> valueStateDescriptor = new ValueStateDescriptor<>("my-statu", Events.class);
//配置ttl
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.hours(1))//状态超过一小时则清掉
.setUpdateType(StateTtlConfig.UpdateType.OnReadAndWrite)//什么时候更新状态的失效时间,写操作、创建操作
.setStateVisibility(StateTtlConfig.StateVisibility.ReturnExpiredIfNotCleanedUp)//状态可见性
.build();
valueStateDescriptor.enableTimeToLive(ttlConfig);//将ttl配给值状态描述器