package com.idea.fault.code
import java.lang
import java.time.Duration
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.util.Collector
case class WordEntity(word: String, ts: Long)
/***
* 单词3秒未重复出现则输出该单词
*/
object ValueStateDemo {
def main(args: Array[String]): Unit = {
// TODO 创建执行环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// TODO 2.读取数据构建数据源:订阅两个topic:
val inputDs: DataStream[String] = env.socketTextStream("hadoop102", 9999)
// TODO 3.数据转换:将 JSONString 转换为 样例类
val mapDs: DataStream[WordEntity] = inputDs.map(new RichMapFunction[String, WordEntity] {
override def map(value: String): WordEntity = {
val wordArr: Array[String] = value.split(",")
val word: String = wordArr(0)
val ts: Long = wordArr(1).toLong
WordEntity(word, ts)
}
})
// TODO 提取事件时间生成watermark
val watermarkStrategy: WatermarkStrategy[WordEntity] = WatermarkStrategy.forBoundedOutOfOrderness[WordEntity](Duration.ofSeconds(2))
.withTimestampAssigner(new SerializableTimestampAssigner[WordEntity] {
override def extractTimestamp(element: WordEntity, recordTimestamp: Long): Long = element.ts
})
val watermarkDs: DataStream[WordEntity] = mapDs.assignTimestampsAndWatermarks(watermarkStrategy)
// TODO 分组:按照 word 分组
val keyedbyDs: KeyedStream[WordEntity, String] = watermarkDs.keyBy(_.word)
// TODO 状态编程
keyedbyDs.process(new KeyedProcessFunction[String,WordEntity,WordEntity] {
// 定义状态:
var valueState:ValueState[Long] = _
// 初始化状态
override def open(parameters: Configuration): Unit = {
super.open(parameters)
val valueStateDescriptor: ValueStateDescriptor[Long] = new ValueStateDescriptor[Long]("value-state", classOf[Long])
valueState = getRuntimeContext.getState(valueStateDescriptor)
}
// 状态编程核心:状态逻辑处理
override def processElement(value: WordEntity, ctx: KeyedProcessFunction[String, WordEntity, WordEntity]#Context, out: Collector[WordEntity]): Unit = {
// 获取单词最后出现的时间戳
val lastTime: lang.Long = ctx.timestamp()
// 定时时间
val temerTime:Long = lastTime + 3000
// 获取状态的值
val stateTime: Long = valueState.value()
// 更新状态值
valueState.update(temerTime)
// 注册定时器
ctx.timerService().registerEventTimeTimer(temerTime)
???
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, WordEntity, WordEntity]#OnTimerContext, out: Collector[WordEntity]): Unit = {
val stateTime: Long = valueState.value()
if(timestamp == stateTime){
out.collect(classOf[WordEntity])
}
}
})
// TODO 启动任务
env.execute("ValueStateDemo")
}
}
ValueState
最新推荐文章于 2023-10-08 15:45:45 发布