Flink_电商用户统计——sql

package com.hotitems_analysis

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
import org.apache.flink.table.api.{EnvironmentSettings, Slide, Table}
import org.apache.flink.table.api.scala._
import org.apache.flink.types.Row

object HotItemsWithSql {
def main(args: Array[String]): Unit = {
//1.创建流处理环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//2.设置event时间语义
//env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//3.定义输入数据流
//val inputStream: DataStream[String] = env.readTextFile(“in/User2.csv”)
//4.定义kafka输入源
val properties = new Properties()
properties.setProperty(“bootstrap.servers”, “hadoop203:9092”)
properties.setProperty(“group.id”, “consumer-group”)
properties.setProperty(“key.deserializer”,
“org.apache.kafka.common.serialization.StringDeserializer”)
properties.setProperty(“value.deserializer”,
“org.apache.kafka.common.serialization.StringDeserializer”)
val inputStream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String](“hotitiems”,new SimpleStringSchema(),properties))
//4.基于数据转换为样例类,并提取时间戳指定wartermark 数据经过ETL 没有乱序数据 所以按照升序即可 也无序生成watermark
val dataStream: DataStream[UserBehavior] = inputStream
.map(data => {
val arr = data.split(",")
UserBehavior(arr(0).toLong, arr(1).toLong, arr(2).toInt, arr(3), arr(4).toLong)
}).assignAscendingTimestamps(_.timestamp * 1000L)
//创建表执行环境 基于blinkSettings
val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env,settings)
val dataTable: Table = tableEnv.fromDataStream(dataStream,'itemId,'behavior,'timestamp.rowtime as 'ts)
//转换
//1.开窗
val aggTable: Table = dataTable.filter('behavior === “pv”) //过滤数据
.window(Slide over 1.hour every 5.minute on 'ts as 'sw) //先开窗 1小时的窗口5分钟滑动步长
.groupBy('itemId, 'sw)
.select('itemId, 'sw.end as 'windowEnd, 'itemId.count as 'cnt)
//用SQL去实现Topn选取
tableEnv.createTemporaryView(“aggtable”,aggTable,'itemId,'windowEnd,'cnt)
val resultTable: Table = tableEnv.sqlQuery(
“”"
select *
from(
select
*,
row_number()
over (partition by windowEnd order by cnt desc)
as row_num
from aggtable)
where row_num<=5
“”".stripMargin)
resultTable.toRetractStream[Row].print(“sql”)
env.execute()
}
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值