- 批处理的案例
object table_batch {
def main(args: Array[String]): Unit = {
//创建执行的环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val tableENV: StreamTableEnvironment = StreamTableEnvironment.create(env)
//设置并行度
env.setParallelism(1)
var source = CsvTableSource.builder()
.path("D:\\ideaProject\\flink-base\\output\\test.csv")
.field("id", Types.INT)
.field("name", Types.STRING)
.field("age", Types.INT)
.fieldDelimiter(",") //设置分隔符
.ignoreParseErrors() //忽略解析错误
.ignoreFirstLine() //忽略第一行
.build()
//注册成为表
tableENV.registerTableSource("Users", source)
val resout: Table = tableENV.scan("Users").filter("age>23").select("id,age")
//sink 设置参数
val tableSink =
new CsvTableSink("./output/8.txt", "\t", 1, WriteMode.OVERWRITE)
//注册sink
tableENV.registerTableSink(
"UsersOut",
Array[String]("f1", "f2"),
Array[TypeInformation[_]](Types.INT, Types.INT),
tableSink
)
resout.insertInto("UsersOut")
//执行job
env.execute()
}
}
2.流式处理
object table_stream {
def main(args: Array[String]): Unit = {
//创建执行的环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
//创建表环境
val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)
//创建数据源
val resout: DataStream[Users] = env.socketTextStream("localhost", 9999)
.map(_.split(","))
.map(x => Users(x(0).toInt, x(1).toString, x(2).toInt))
//将数据注册成一张表
tableEnvironment.registerDataStream("Users",resout)
val table: Table = tableEnvironment.scan("Users").filter("age>23")
tableEnvironment.toAppendStream[Row](table).print()
//执行job
env.execute()
}
case class Users(id:Int,name:String,age:Int )
}
3.Table和window 进行整合
def main(args: Array[String]): Unit = {
//创建执行的环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//创建表环境
val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)
//创建数据源
val messageDS: DataStream[Message] = env.socketTextStream("localhost", 9999)
.map {
x =>
var t = x.split(",")
Message(t(0).toString, t(1).toLong)
}
.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Message] {
//定义要延迟的时间 长度
var maxOutTime = 5000L
//历史最大事件的时间
var currentMaxTimestamp: Long = _
override def getCurrentWatermark: Watermark = {
//获得水印
val watermark = new Watermark(currentMaxTimestamp - maxOutTime)
watermark
}
override def extractTimestamp(t: Message, l: Long): Long = {
val eventTime: Long = t.createTime
currentMaxTimestamp= Math.max(currentMaxTimestamp, eventTime)
eventTime
}
})
//创建表
import org.apache.flink.table.api.scala._
val table: Table = tableEnvironment.fromDataStream(messageDS, 'word,'createTime.rowtime)
//添加滚动窗口
val table_window: Table = table.window(Tumble over 5.second on 'createTime as 'window)
.groupBy('window, 'word)
.select('word, 'window.start, 'window.end, 'word.count)
tableEnvironment.toRetractStream[Row](table_window)
.filter(x => x._1 == true).print()
//执行job
tableEnvironment.execute("table")
env.execute()
}
case class Message(word: String, createTime: Long)
}
5.SQL和window进行整合
object SQL_windows {
def main(args: Array[String]): Unit = {
//创建执行的环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//创建数据源
val messageDS: DataStream[Message] = env.socketTextStream("localhost", 9999)
.map { r =>
var t = r.split(",")
Message(t(0).toString, t(1).toLong)
}
.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Message] {
var maxOuttime = 5000L
var currentMaxTimestamp: Long = _
override def getCurrentWatermark: Watermark = {
val watermark = new Watermark(currentMaxTimestamp - maxOuttime)
watermark
}
override def extractTimestamp(t: Message, l: Long): Long = {
val eventTime: Long = t.createTime
currentMaxTimestamp = Math.max(eventTime, currentMaxTimestamp)
eventTime
}
})
import org.apache.flink.table.api.scala._
tableEnvironment.registerDataStream("t_word",messageDS,'word,'createTime.rowtime)
val table: Table = tableEnvironment.sqlQuery("select word,count(*) from t_word group by tumble(createTime,interval '5' second),word")
tableEnvironment.toRetractStream[Row](table).filter(x=>x._1==true).print()
//执行job
env.execute()}
case class Message(word:String,createTime:Long)
}