项目场景:
自定义 flink UDF 函数 使用 bsTableEnv.createTemporarySystemFunction(“split”,Split) 注册 table 自定义函数时候
问题描述:
n: Scala tuples are not supported. Use case classes or 'org.apache.flink.typ
package UDF
import java.time.Duration
import akka.stream.actor.WatermarkRequestStrategy
import com.ibm.icu.impl.locale.LocaleDistance.Data
import org.apache.flink.api.common.eventtime._
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.table.annotation.{DataTypeHint, FunctionHint}
import org.apache.flink.table.api._
import org.apache.flink.table.api.bridge.scala._
import org.apache.flink.table.catalog.DataTypeFactory
import org.apache.flink.table.descriptors.{Csv, FileSystem, Rowtime, Schema}
import org.apache.flink.table.functions.{FunctionKind, ScalarFunction, TableFunction, UserDefinedFunction}
import org.apache.flink.table.types.inference.TypeInference
import org.apache.flink.types.Row
import ru.yandex.clickhouse.ClickHouseArray
object scalarFunction {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
// env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)
val bsTableEnv = StreamTableEnvironment.create(env, bsSettings)
val value: DataStream[String] = env.readTextFile("C:\\Users\\zhou\\IdeaProjects\\flinktable\\conf\\inputONE.txt")
val unit = value.map(x => {
val strings = x.split(",")
Demo(strings(0), strings(1).toInt, strings(2).toLong)
}
//打
// 水印 时间时间
).assignTimestampsAndWatermarks(WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofMillis(1)).withTimestampAssigner(new SerializableTimestampAssigner[Demo]() {
override def extractTimestamp(element: Demo, recordTimestamp: Long): Long = {
element.timeStr
}
}))
val table: Table = bsTableEnv.fromDataStream(unit, 'id, 'age, 'timeStr.rowtime)
bsTableEnv.createTemporaryView("sensor",table)
bsTableEnv.createTemporarySystemFunction("split",Split)
val table1 = bsTableEnv.sqlQuery(
"""
|select id,word,length
|from sensor ,
|LATERAL TABLE(split(id)) as t(word,length)
|
|""".stripMargin)
table1.toAppendStream[Row].print("sql word")
env.execute("ss")
}
}
@FunctionHint(output = new DataTypeHint("ROW<word STRING, length INT>"))
class Split(Key:String) extends TableFunction[Row]{
def eval(str: String): Unit ={
str.split(Key).foreach(
x=>{
collect(Row.of(x, Int.box(x.length)))
}
)
}
}
case class Demo(id: String, age: Int, timeStr: Long)
原因分析:
当 不用 bsTableEnv.registerFunction(“split”, new Split()) 类型推导失败
解决方案:
解决方案在上面代码中 就是在添加一个注解@FunctionHint(output = new DataTypeHint(“ROW<word STRING, length INT>”)) 去解释你要返回参数的类型 如果是聚合函数那么就不能 用此方法注册 官网上写明 在聚合函数使用新的类型系统前,本节仅适用于标量和表值函数。
所有的自定义函数都遵循一些基本的实现原则。所以聚合函数还是用 bsTableEnv.registerFunction(“split”, new Split()) 注册 但是返回的类型如果推导不出来可以使用 AggregateFunction#getResultType() 和 AggregateFunction#getAccumulatorType() 来分别指定返回值类型和 accumulator 的类型,两个函数的返回值类型也都是 TypeInformation
项目场景:
在使用 low-level 函数 尤其 是processfunction 的时候
问题描述:
在使用 low-level 函数 尤其 是processfunction 的时候 或者keyedprocessfunction 和容易出现 Cannot resolve overloaded method 这样的提示 而且还消除不掉
原因分析:
在给这些函数填入泛型的时候一定要注意 尤其在keyby 之后 注意 key 的类型 要不然总是会引入不成功
如果这里我的key 用Tulpe 类型是如果是 keyedSelector 可以用 类似 (string,string) 如果是单个值可以直接用本key的类型比如下面的 key:Long 推断不了的 所以一定要根据自己上文的 类型来推断 尤其 1.12 版本
解决方案:
package com.atgui.hotiterms_analysisi
import java.sql.Timestamp
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.{KeyedProcessFunction, ProcessFunction}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.{ProcessWindowFunction, WindowFunction}
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
import scala.collection.mutable.ListBuffer
/**
* @author Mr.zhou
* @Data 2021/6/23
* @version 1.0
* 热门shang
**/
object Hotiterms {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val input = env.readTextFile("C:\\Users\\zhou\\IdeaProjects\\User\\HotItermsAnalysis\\src\\main\\resources\\UserBehavior.csv")
val value1 = input.map(x => {
val str = x.split(",")
UserBehavior(str(0).toLong, str(1).toLong, str(2).toInt, str(3), str(4).toLong)
}).assignAscendingTimestamps(_.timestamp * 1000)
.filter(_.behavior == "pv")
.keyBy(x => (x.itermId))
.window(SlidingEventTimeWindows.of(Time.hours(10), Time.minutes(
5)))
.aggregate(new CountAgg(), new WindowResultFunctionTwo())
val value: DataStream[ItermViewCount] = value1
val output = value.keyBy(x => x.windowEnd)
.process(new TopNHotItems(3))
output.print("top")
value1.print("count")
env.execute("ss")
}
}
case class UserBehavior(userId: Long, itermId: Long, categoryId: Int, behavior: String, timestamp: Long)
//ItermViewCount 定义窗口聚合结果样例类
case class ItermViewCount(item: Long, windowEnd: Long, count: Long)
//聚合方式
class CountAgg extends AggregateFunction[UserBehavior, Long, Long] {
override def createAccumulator(): Long = 0L
override def add(userBehavior: UserBehavior, acc: Long): Long = acc + 1
override def getResult(acc: Long): Long = acc
override def merge(acc1: Long, acc2: Long): Long = acc1 + acc2
}
//窗口的函数
class WindowResultFunction extends WindowFunction[Long, ItermViewCount, Long, TimeWindow] {
override def apply(key: Long, window: TimeWindow, input: Iterable[Long], out: Collector[ItermViewCount]): Unit = {
val itemId: Long = key.asInstanceOf[Long]
val count = input.iterator.next
out.collect(ItermViewCount(itemId, window.getEnd, count))
}
}
// ProcessWindowFunction
class WindowResultFunctionTwo extends ProcessWindowFunction[Long, ItermViewCount, Long, TimeWindow] {
override def process(key: Long, context: Context, elements: Iterable[Long], out: Collector[ItermViewCount]): Unit = {
val itemId: Long = key
val count = elements.iterator.next
val time = context.currentProcessingTime
out.collect(ItermViewCount(itemId, time, count))
}
}
case class CountWithTimestamp(key: String, count: Long, lastModified: Long)
class CountWithTimeoutFunction extends KeyedProcessFunction[Tuple, (String, String), (String, Long)] {
/** The state that is maintained by this process function */
lazy val state: ValueState[CountWithTimestamp] = getRuntimeContext
.getState(new ValueStateDescriptor[CountWithTimestamp]("myState", classOf[CountWithTimestamp]))
override def processElement(
value: (String, String),
ctx: KeyedProcessFunction[Tuple, (String, String), (String, Long)]#Context,
out: Collector[(String, Long)]): Unit = {
// initialize or retrieve/update the state
val current: CountWithTimestamp = state.value match {
case null =>
CountWithTimestamp(value._1, 1, ctx.timestamp)
case CountWithTimestamp(key, count, lastModified) =>
CountWithTimestamp(key, count + 1, ctx.timestamp)
}
// write the state back
state.update(current)
// schedule the next timer 60 seconds from the current event time
ctx.timerService.registerEventTimeTimer(current.lastModified + 60000)
}
override def onTimer(
timestamp: Long,
ctx: KeyedProcessFunction[Tuple, (String, String), (String, Long)]#OnTimerContext,
out: Collector[(String, Long)]): Unit = {
state.value match {
case CountWithTimestamp(key, count, lastModified) if (timestamp == lastModified + 60000) =>
out.collect((key, count))
case _ =>
}
}
}
// 求某个窗口中前 N 名的热门点击商品,key 为窗口时间戳,输出为 TopN 的结果字符串、 如果这里我的key 用Tulpe 类型是如果是 keyedSelector 可以用 类似 (string,string) 如果是单个值可以直接用本key的类型比如下面的 key:Long 推断不了的 所以一定要根据自己上文的 类型来推断 尤其 1.12 版本
class TopNHotItems(topSize: Int) extends KeyedProcessFunction[Long, ItermViewCount, String] {
private var itemState: ListState[ItermViewCount] = _
override def open(parameters: Configuration): Unit = {
super.open(parameters)
// 命名状态变量的名字和状态变量的类型
val itemsStateDesc = new ListStateDescriptor[ItermViewCount]("itemState-state", classOf[ItermViewCount])
// 从运行时上下文中获取状态并赋值
itemState = getRuntimeContext.getListState(itemsStateDesc)
}
override def processElement(input: ItermViewCount, context: KeyedProcessFunction[Long, ItermViewCount, String]#Context, collector: Collector[String]): Unit = {
// 每条数据都保存到状态中
itemState.add(input)
// 注册 windowEnd+1 的 EventTime Timer, 当触发时,说明收齐了属于 windowEnd 窗口的所 有商品数据
// 也就是当程序看到 windowend + 1 的水位线 watermark 时,触发 onTimer 回调函数
context.timerService.registerEventTimeTimer(input.windowEnd + 1)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, ItermViewCount, String]#OnTimerContext, out: Collector[String]): Unit = {
// 获取收到的所有商品点击量
val allItems: ListBuffer[ItermViewCount] = ListBuffer()
import scala.collection.JavaConversions._
for (item <- itemState.get) {
allItems += item
}
// 提前清除状态中的数据,释放空间 itemState.clear()
// 按照点击量从大到小排序
val sortedItems = allItems.sortBy(_.count)(Ordering.Long.reverse).take(topSize)
// 将排名信息格式化成 String, 便于打印
val result: StringBuilder = new StringBuilder
result.append("====================================\n")
result.append("时间: ").append(new Timestamp(timestamp - 1)).append("\n")
for (i <- sortedItems.indices) {
val currentItem: ItermViewCount = sortedItems(i)
// e.g. No1: 商品 ID=12224 浏览量=2413
result.append("No").append(i + 1).append(":")
.append(" 商品 ID=").append(currentItem.item)
.append(" 浏览量=").append(currentItem.count)
.append("\n")
}
result.append("====================================\n\n")
// 控制输出频率,模拟实时滚动结果
Thread.sleep(1000)
out.collect(result.toString)
}
}
项目场景:
spring boot @Slf4j 的 时候 出现 找不到符号的 错误
问题描述:
使用 spring boot @Slf4j 的 时候 出现 找不到符号的 错误
解决方案:
1 查看一下 是否存在 Lombok 插件已经安装
2 pom 文件是否 引入 Lombok jar 包
3 是否引入重复 可以将 pom 引入的注释掉 再启动一下 有时候 再引入工程中已经包含了此包 引入引起 异常
项目场景:
flink new OutputTag(“id”) 使用不了
问题描述:
link new OutputTag(“id”) 报错jvm 无法推断返回类型
抛出异常;Could not determine TypeInformation for the OutputTag type. The most common reason is forgetting to make the OutputTag an anonymous inner class. It is also not possible to use generic type variables with OutputTags, such as ‘Tuple2<A, B>’. 尤其无法识别 元祖类型
解决方案:
一: 使用 两个参数的 侧流 第二个 就是类型 推断写死
1:TypeHint() 元祖类型
new OutputTag<Tuple3<String, Integer, Long>>("ok", new TypeHint<Tuple3<String, Integer, Long>>(){}.getTypeInfo());
TypeInformation<Tuple2<String, Long>> info = TypeInformation.of(new TypeHint<Tuple2<String, Long>>(){});
or
TypeInformation<Tuple2<String, Long>> info = new TypeHint<Tuple2<String, Long>>(){}.getTypeInfo();
2:class() pojo 类

1776

被折叠的 条评论
为什么被折叠?



