**示例**
编写代码, 计算5秒内,用户的订单总额
订单数据(订单ID——UUID、用户ID、时间戳、订单金额),要求`添加水印`来解决网络延迟问题。
import java.util.UUID
import java.util.concurrent.TimeUnit
import org.apache.commons.lang.time.FastDateFormat
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import scala.util.Random
object WaterMarkDemo {
// 3. 创建一个订单样例类`Order`,包含四个字段(订单ID、用户ID、订单金额、时间戳)
case class Order(orderId: String, userId: Int, money: Long, timestamp: Long)
def main(args: Array[String]): Unit = {
// 1. 创建流处理运行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 2. 设置处理时间为`EventTime`
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
// 4. 创建一个自定义数据源
val orderDataStream: DataStream[Order] = env.addSource(new RichSourceFunction[Order] {
var isRunning = true
override def run(ctx: SourceFunction.SourceContext[Order]): Unit = {
while (isRunning) {
// - 随机生成订单ID(UUID)
// - 随机生成用户ID(0-2)
// - 随机生成订单金额(0-100)
// - 时间戳为当前系统时间
// - 每隔1秒生成一个订单
val order = Order(UUID.randomUUID().toString, Random.nextInt(3), Random.nextInt(101), new java.util.Date().getTime)
ctx.collect(order)
TimeUnit.SECONDS.sleep(1)
}
}
override def cancel(): Unit = isRunning = false
})
// 5. 添加水印
val watermarkDataStream = orderDataStream.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Order] {
var currentTimestamp = 0L
val delayTime = 2000
override def getCurrentWatermark: Watermark = {
// - 允许延迟2秒
// - 在获取水印方法中,打印水印时间、当前事件时间和当前系统时间
val watermark = new Watermark(currentTimestamp - delayTime)
val dateFormat = FastDateFormat.getInstance("HH:mm:ss")
println(s"当前水印时间:${dateFormat.format(watermark.getTimestamp)}, 当前事件时间: ${dateFormat.format(currentTimestamp)}, 当前系统时间: ${dateFormat.format(System.currentTimeMillis())}")
watermark
}
override def extractTimestamp(element: Order, previousElementTimestamp: Long): Long = {
val timestamp = element.timestamp
currentTimestamp = Math.max(currentTimestamp, timestamp)
currentTimestamp
}
})
// 6. 按照用户进行分流
// 7. 设置5秒的时间窗口
// 8. 进行聚合计算
// 9. 打印结果数据
// 10. 启动执行流处理
watermarkDataStream.keyBy(_.userId)
.timeWindow(Time.seconds(5))
.reduce {
(order1, order2) =>
Order(order2.orderId, order2.userId, order1.money + order2.money, 0)
}
.print()
env.execute("WarkMarkDemoJob")
}
}
Flink水印案例及解析
最新推荐文章于 2024-06-17 11:18:26 发布