Flink table SQL

  1. 批处理的案例
object table_batch {
  def main(args: Array[String]): Unit = {
    //创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val tableENV: StreamTableEnvironment = StreamTableEnvironment.create(env)

    //设置并行度
    env.setParallelism(1)

    var source = CsvTableSource.builder()
      .path("D:\\ideaProject\\flink-base\\output\\test.csv")
      .field("id", Types.INT)
      .field("name", Types.STRING)
      .field("age", Types.INT)
      .fieldDelimiter(",") //设置分隔符
      .ignoreParseErrors() //忽略解析错误
      .ignoreFirstLine() //忽略第一行
      .build()
    //注册成为表
    tableENV.registerTableSource("Users", source)

    val resout: Table = tableENV.scan("Users").filter("age>23").select("id,age")

    //sink 设置参数
    val tableSink =
      new CsvTableSink("./output/8.txt", "\t", 1, WriteMode.OVERWRITE)
    //注册sink
    tableENV.registerTableSink(
      "UsersOut",
      Array[String]("f1", "f2"),
      Array[TypeInformation[_]](Types.INT, Types.INT),
      tableSink
    )

    resout.insertInto("UsersOut")

    //执行job
    env.execute()
  }
}

2.流式处理

object table_stream {
  def main(args: Array[String]): Unit = {

    //创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置并行度
    env.setParallelism(1)

    //创建表环境
    val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)

    //创建数据源

    val resout: DataStream[Users] = env.socketTextStream("localhost", 9999)
      .map(_.split(","))
      .map(x => Users(x(0).toInt, x(1).toString, x(2).toInt))

    //将数据注册成一张表
    tableEnvironment.registerDataStream("Users",resout)
    val table: Table = tableEnvironment.scan("Users").filter("age>23")
    tableEnvironment.toAppendStream[Row](table).print()
    //执行job
    env.execute()
  }
case class Users(id:Int,name:String,age:Int )
}

3.Table和window 进行整合

def main(args: Array[String]): Unit = {

    //创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置并行度
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    //创建表环境
    val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)
    //创建数据源

    val messageDS: DataStream[Message] = env.socketTextStream("localhost", 9999)
      .map {
        x =>
          var t = x.split(",")
          Message(t(0).toString, t(1).toLong)
      }
      .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Message] {

        //定义要延迟的时间 长度
        var maxOutTime = 5000L
        //历史最大事件的时间
        var currentMaxTimestamp: Long = _

        override def getCurrentWatermark: Watermark = {

          //获得水印
          val watermark = new Watermark(currentMaxTimestamp - maxOutTime)
          watermark
        }

        override def extractTimestamp(t: Message, l: Long): Long = {
          val eventTime: Long = t.createTime
          currentMaxTimestamp= Math.max(currentMaxTimestamp, eventTime)
          eventTime

        }
      })
    //创建表
    import org.apache.flink.table.api.scala._
    val table: Table = tableEnvironment.fromDataStream(messageDS, 'word,'createTime.rowtime)

    //添加滚动窗口
    val table_window: Table = table.window(Tumble over 5.second on 'createTime as 'window)
      .groupBy('window, 'word)
      .select('word, 'window.start, 'window.end, 'word.count)

    tableEnvironment.toRetractStream[Row](table_window)
      .filter(x => x._1 == true).print()
    //执行job
    tableEnvironment.execute("table")
    env.execute()
  }
  case class Message(word: String, createTime: Long)
}

5.SQL和window进行整合

object SQL_windows {
  def main(args: Array[String]): Unit = {
    //创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置并行度
    env.setParallelism(1)
    val tableEnvironment: StreamTableEnvironment = StreamTableEnvironment.create(env)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    //创建数据源
    val messageDS: DataStream[Message] = env.socketTextStream("localhost", 9999)

      .map { r =>
        var t = r.split(",")
        Message(t(0).toString, t(1).toLong)
      }
      .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Message] {
        var maxOuttime = 5000L
        var currentMaxTimestamp: Long = _

        override def getCurrentWatermark: Watermark = {
          val watermark = new Watermark(currentMaxTimestamp - maxOuttime)
          watermark
        }

        override def extractTimestamp(t: Message, l: Long): Long = {

          val eventTime: Long = t.createTime

          currentMaxTimestamp = Math.max(eventTime, currentMaxTimestamp)

          eventTime
        }
      })
    import org.apache.flink.table.api.scala._
    tableEnvironment.registerDataStream("t_word",messageDS,'word,'createTime.rowtime)
    val table: Table = tableEnvironment.sqlQuery("select word,count(*) from t_word group by tumble(createTime,interval '5' second),word")
    tableEnvironment.toRetractStream[Row](table).filter(x=>x._1==true).print()

    //执行job
    env.execute()}
 case class Message(word:String,createTime:Long)
}

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值