文章目录
用于测试的CustomSourceFunction
class CustomSourceFunction extends RichSourceFunction[(String, Int)]{
var flag = true
override def run(ctx: SourceFunction.SourceContext[(String, Int)]): Unit = {
val arr: Array[String] = Array("a", "b", "c", "d", "e", "f", "g")
val random: Random = new Random()
while (flag) {
Thread.sleep(1000)
// 随机取一个数组中的值
val key: String = arr(random.nextInt(arr.length))
val rightNow: Int = random.nextInt(10)
ctx.collect((key, rightNow))
}
}
override def cancel(): Unit = {
flag = false
}
}
State
OperatorState 算子状态的作用范围限定为算子任务
ListState
UnionListState
BroadcastState
KeyedState 根据输入数据流中定义的键(key)来维护和访问
ValueState
object ValueStateDemo {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val customSourceFunction: CustomSourceFunction2 = new CustomSourceFunction2
val stream: DataStream[(String, Long)] = env.addSource(customSourceFunction)
stream.keyBy(0)
.flatMap(flatMapWithState)
.print("stream")
env.execute()
}
/**
* 带状态的flatMapFunction
*
* @return
*/
def flatMapWithState: RichFlatMapFunction[(String, Long), String] = {
new RichFlatMapFunction[(String, Long), String] {
var timeState: ValueState[Long] = null
override def open(parameters: Configuration): Unit = {
// 初始化State
timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
}
override def flatMap(value: (String, Long), out: Collector[String]): Unit = {
val maxTime: Long = timeState.value()
// 如果时间更大,则数据更新
// maxTime == null , 用于防止maxTime被初始化为null的情况
if (maxTime == null || value._2 > maxTime) {
// 更新状态
timeState.update(value._2)
out.collect(value._1 + "----" + value._2)
} else {
// 否则不做处理
println("没有更新")
}
}
}
}
}
ListState
object ListStateDemo {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val customSourceFunction: CustomSourceFunction = new CustomSourceFunction
val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)
stream.keyBy(0)
.flatMap(flatMapWithState)
.print("stream")
env.execute()
}
/**
* 带状态的flatMapFunction
*
* @return
*/
def flatMapWithState: RichFlatMapFunction[(String, Int), List[String]] = {
new RichFlatMapFunction[(String, Int), List[String]] {
var timeState: ListState[String] = _
override def open(parameters: Configuration): Unit = {
// 初始化State
// timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
timeState = getRuntimeContext.getListState(new ListStateDescriptor[String]("listState", classOf[String]))
}
override def flatMap(value: (String, Int), out: Collector[List[String]]): Unit = {
if (value._2 > 5) {
timeState.add(value._1 + "---" + value._2)
} else {
println(value._1 + "===" + value._2 + "非异常数据")
}
val states = timeState.get().iterator()
val listBuf: ListBuffer[String] = new ListBuffer[String]()
while (states.hasNext) {
listBuf.append(states.next())
}
out.collect(listBuf.toList)
}
}
}
}
MapState
类似于ListState
ReducingState&AggregatingState
object ReducingStateDemo {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val customSourceFunction: CustomSourceFunction = new CustomSourceFunction
val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)
stream.keyBy(0)
.map(mapWithState)
.print("stream")
env.execute()
}
/**
* 带状态的mapFunction
*
* @return
*/
def mapWithState: RichMapFunction[(String, Int), Int] = {
new RichMapFunction[(String, Int), Int] {
var timeState: ReducingState[Int] = _
/**
* ReducingState,一个容器,这个容器里面自己去实现一个ReduceFunction
* 当数据添加进容器时,就去调这个ReduceFunction,实现业务逻辑
* 对比ValueState,ValueState的业务逻辑要在初始化外面去实现业务逻辑
* @param parameters
*/
override def open(parameters: Configuration): Unit = {
// 初始化State
timeState = getRuntimeContext.getReducingState(
new ReducingStateDescriptor[Int](
"reducingState",
new ReduceFunction[Int] {
override def reduce(value1: Int, value2: Int): Int = {
if (value1 > value2) value1 else value2
}
},
classOf[Int])
)
}
override def map(value: (String, Int)): Int = {
timeState.add(value._2)
val i: Int = timeState.get()
i
}
}
}
}
State Backend 状态后端
概念
State Backend 状态后端
生产配置
// 状态管理器配置
// 两种:应用级别配置和集群级别配置
// 应用级别配置:
// MemoryStateBackend不需要显式配置
// 配置FsStateBackend
env.setStateBackend(new FsStateBackend("本地文件系统file:// 或者 HDFS文件系统hdfs://"))
// 配置RocksDBStateBackend
env.setStateBackend(new RocksDBStateBackend("hdfs://"))
// 集群配置在flink-conf.yaml文件中配置,一般集群是用别人的,不会自己搭建生产集群,还是用应用级别配置吧