package com.hotitems_analysis
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
import org.apache.flink.table.api.{EnvironmentSettings, Slide, Table}
import org.apache.flink.table.api.scala._
import org.apache.flink.types.Row
object HotItemsWithSql {
def main(args: Array[String]): Unit = {
//1.创建流处理环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//2.设置event时间语义
//env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//3.定义输入数据流
//val inputStream: DataStream[String] = env.readTextFile(“in/User2.csv”)
//4.定义kafka输入源
val properties = new Properties()
properties.setProperty(“bootstrap.servers”, “hadoop203:9092”)
properties.setProperty(“group.id”, “consumer-group”)
properties.setProperty(“key.deserializer”,
“org.apache.kafka.common.serialization.StringDeserializer”)
properties.setProperty(“value.deserializer”,
“org.apache.kafka.common.serialization.StringDeserializer”)
val inputStream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String](“hotitiems”,new SimpleStringSchema(),properties))
//4.基于数据转换为样例类,并提取时间戳指定wartermark 数据经过ETL 没有乱序数据 所以按照升序即可 也无序生成watermark
val dataStream: DataStream[UserBehavior] = inputStream
.map(data => {
val arr = data.split(",")
UserBehavior(arr(0).toLong, arr(1).toLong, arr(2).toInt, arr(3), arr(4).toLong)
}).assignAscendingTimestamps(_.timestamp * 1000L)
//创建表执行环境 基于blinkSettings
val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env,settings)
val dataTable: Table = tableEnv.fromDataStream(dataStream,'itemId,'behavior,'timestamp.rowtime as 'ts)
//转换
//1.开窗
val aggTable: Table = dataTable.filter('behavior === “pv”) //过滤数据
.window(Slide over 1.hour every 5.minute on 'ts as 'sw) //先开窗 1小时的窗口5分钟滑动步长
.groupBy('itemId, 'sw)
.select('itemId, 'sw.end as 'windowEnd, 'itemId.count as 'cnt)
//用SQL去实现Topn选取
tableEnv.createTemporaryView(“aggtable”,aggTable,'itemId,'windowEnd,'cnt)
val resultTable: Table = tableEnv.sqlQuery(
“”"
select *
from(
select
*,
row_number()
over (partition by windowEnd order by cnt desc)
as row_num
from aggtable)
where row_num<=5
“”".stripMargin)
resultTable.toRetractStream[Row].print(“sql”)
env.execute()
}
}