flink状态管理和容错机制
State
Keyed State
顾名思义,就是基于KeyedStream上的状态。这个状态是跟特定的key绑定的,对KeyedStream流上的每一个key,都对应一个state。
ValueState<T>
:即类型为T的单值
状态。这个状态与对应的key绑定,是最简单的状态了。它可以通过update
方法更新状态值,通过value()
方法获取状态值
ListState:即key上的状态值为一个列表。可以通过add方法往列表中附加值;也可以通过get()方法返回一个Iterable来遍历状态值
ReducingState:这种状态通过用户传入的reduceFunction,每次调用add方法添加值的时候,会调用reduceFunction,最后合并到一个单一的状态值
MapState<UK, UV>:即状态值为一个map。用户通过put或putAll方法添加元素
valueState
object StateValue {
/**
* 使用valueState保存每个word的最小值
* @param args
*/
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val dataStream: DataStream[String] = environment.socketTextStream("192.168.169.128", 9000, '\n')
dataStream.map(_.split(" "))
.map(k=>(k(0),k(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var leastValueState: ValueState[Int] = null
override def open(parameters: Configuration): Unit = {
// 构建 ValueState
val leastValueDescriptor: ValueStateDescriptor[Int] = new ValueStateDescriptor[Int]("leastValue", classOf[Int],1000000)
// 通过getRuntimeContext.getState获取State
leastValueState = getRuntimeContext.getState(leastValueDescriptor)
// print(leastValueState.value())
}
override def map(in: (String,Int)): (String,Int) = {
// 每到一个map进行对比
var minValue: Int = leastValueState.value()
if (in._2<minValue) {
minValue = in._2
leastValueState.update(minValue)
}
(in._1,minValue)
}
}).print()
environment.execute("ValuesState")
}
}
ListState
object StateList {
/**
* 使用ListState保存每个word的所有数值列表
* @param args
*/
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val dataStream: DataStream[String] = environment.socketTextStream("192.168.169.128", 9000, '\n')
dataStream.map(_.split(" "))
.map(k=>(k(0),k(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),lang.Iterable[(String, Int)]] {
var wordListState: ListState[(String, Int)] = null
override def open(parameters: Configuration): Unit = {
val listState = new ListStateDescriptor[(String, Int)]("listState", classOf[Tuple2[String, Int]])
wordListState = getRuntimeContext.getListState(listState)
}
override def map(in: (String, Int)):lang.Iterable[(String, Int)] = {
wordListState.add(in)
val tuples: lang.Iterable[(String, Int)] = wordListState.get()
tuples
}
}).print()
environment.execute("ListState")
}
}
ReduceState
object StateReducing {
/**
* reducingState统计每个单词的num和
* @param args
*/
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val dataStream: DataStream[String] = environment.socketTextStream("192.168.169.128", 9000, '\n')
dataStream.map(_.split(" "))
.map(k=>(k(0),k(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var wordNumReduce: ReducingState[(String, Int)] = null
override def open(parameters: Configuration): Unit = {
val reducingState = new ReducingStateDescriptor[(String, Int)]("reducingState", new ReduceFunction[(String, Int)] {
override def reduce(a: (String, Int), b: (String, Int)): (String, Int) = {
(a._1, a._2 + b._2)
}
}, classOf[Tuple2[String, Int]])
wordNumReduce = getRuntimeContext.getReducingState(reducingState)
}
override def map(in: (String, Int)): (String, Int) = {
wordNumReduce.add(in)
val tuple: (String, Int) = wordNumReduce.get()
tuple
}
}).print()
environment.execute("ReducingState")
}
}
aggregatingState
object StateAggregating {
/**
* ReducingState 实现每个单词的num和统计
* @param args
*/
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val dataStream: DataStream[String] = environment.socketTextStream("192.168.169.128", 9000, '\n')
dataStream.map(_.split(" "))
.map(k=>(k(0),k(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var aggregatingState: AggregatingState[(String, Int), (String, Int)] = null
override def open(parameters: Configuration): Unit = {
val aggregating = new AggregatingStateDescriptor[(String, Int), Tuple2[String, Int], (String, Int)]("aggregating", new AggregateFunction[(String, Int), (String, Int), (String, Int)] {
override def createAccumulator(): (String, Int) = {
// 设置累加器初始值为0,由于此处key无法设置,因此设置任意值即可
("aaa",0)
}
override def add(in: (String, Int), acc: (String, Int)): (String, Int) = {
(in._1,in._2+acc._2)
}
override def getResult(acc: (String, Int)): (String,Int) = acc
override def merge(acc: (String, Int), acc1: (String, Int)): (String, Int) = (acc._1,acc._2+acc1._2)
}, classOf[Tuple2[String, Int]])
aggregatingState = getRuntimeContext.getAggregatingState(aggregating)
}
override def map(in: (String, Int)): (String, Int) = {
aggregatingState.add(in)
val tuple: (String, Int) = aggregatingState.get()
tuple
}
}).print()
environment.execute("AggregatingState")
}
}
MapState
object StateMap {
/**
* MapState计算每个word的num和
* @param args
*/
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val dataStream: DataStream[String] = environment.socketTextStream("192.168.169.128", 9000, '\n')
dataStream.map(_.split(" "))
.map(k=>(k(0),k(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var wordMapState: MapState[String, Int] = null
override def open(parameters: Configuration): Unit = {
val mapState = new MapStateDescriptor[String, Int]("mapState", classOf[String], classOf[Int])
wordMapState = getRuntimeContext.getMapState(mapState)
}
override def map(in: (String, Int)): (String, Int) = {
wordMapState.put(in._1,in._2+wordMapState.get(in._1))
val v: Int = wordMapState.get(in._1)
(in._1,v)
}
}).print()
environment.execute("MapState")
}
}
Operator State
- 与Key无关的State,与Operator绑定的state,整个operator
只对应一个
state - 保存state的数据结构
- ListState
- 举例来说,Flink中的Kafka Connector,就使用了operator state。它会在每个connector实例中,保存该实例中消费topic的所有(partition, offset)映射