[Scala] Flink Transform算子 & Sink

**Flink入门Demo:**涉及到的Transform算子有map、flatMap、filter、keyBy、reduce、split、select、connect、union,Sink方法有kafka sink与JDBC sink,其中JDBC sink需要自定义
学习资料都来源于尚硅谷,开源免费,很感谢他们戳我直达

数据源格式为

sensor_1, 1547718199, 35.80018327300259
sensor_1, 1547718210, 28.39839108328901
sensor_6, 1547718203, 15.40298439340308
sensor_6, 1547718211, 23.76213902108990
sensor_1, 1547718201, 34.12333412589598
sensor_7, 1547718202, 6.720945201171228
sensor_10, 1547718205, 38.1010676048934
sensor_10, 1547718206, 33.9334534654789

更多的细节请看代码注释
老生常谈,CSDN不支持Scala,看官请将就

import java.sql._

import com.ck.readFrom.SensorReading
import org.apache.flink.api.common.functions.{RichMapFunction, FilterFunction}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.functions.sink.SinkFunction.Context
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011


object TransformTest {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val streamFromFile = env.readTextFile("src/main/resources/tep.txt")
    val stream: DataStream[SensorReading] = streamFromFile.map(data => {
      val dataArray = data.split(",")
      SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble)
    })

    // 相关的聚合方法
    val aggStream = stream.keyBy(_.id).reduce((x, y) => SensorReading(x.id, y.timestamp + 1, y.temperature + 10))

    // split与select算子,通过两算子配合才能便流彻底切分成两个DataStream
    val splitStream = stream.split(data => {
      if (data.temperature > 30) Seq("high") else Seq("low")
    })
    val high = splitStream.select("high")
    val low = splitStream.select("low")
    val all = splitStream.select("high", "low")

    // connect与coMap/coFlatMap,数据类型可以不一样,但只能合并两条流
    val warning = high.map(data => (data.id, data.temperature))
    val connected = warning.connect(low)
    val coMap = connected.map (
      warningData => (warningData._1, warningData._2, "warning"),
      lowData => (lowData.id, "healthy")
    )

    // union不能合并类型不同的流,可以合并多个
    val unionStream = high.union(low)

    // 函数类,自己实现UDF
    val udfStream = stream.filter(new MyFilter())

    // 匿名函数,注意scala的缩写方法
    val lambdaStream = stream.filter(_.temperature > 30)

    // 富函数demo
    val richStream = stream.map(new MyMapper())

    // 打印结果
//    aggStream.print("aggStream ")
//    high.print("high ")
//    low.print("low ")
//    all.print("all ")
//    coMap.print("coMap ")
//    unionStream.print("unionStream ")
//    udfStream.print("udfStream ")
//    lambdaStream.print("lambdaStream ")
//    richStream.print("richStream")

    // sink结果到kafka
    val kafkaSinkStream: DataStream[String] = streamFromFile.map(data => {
      val dataArray = data.split(",")
      SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble).toString
      // 转String方便序列化
    })
    kafkaSinkStream.addSink(new FlinkKafkaProducer011[String]("localhost:9092", "test", new SimpleStringSchema()))

    // sink结果到MySQL
    stream.addSink(new myJDBCSink())

    // 执行
    env.execute("TransformTest")
  }
}

class MyFilter() extends FilterFunction[SensorReading] {
  override def filter(t: SensorReading): Boolean = {
    t.id.startsWith("sensor_1")
  }
}

// RichMapFunction参数一个为传入,一个为传出类型
class MyMapper() extends RichMapFunction[SensorReading, String] {
  override def map(in: SensorReading): String = {
    in.id
  }
}

class myJDBCSink() extends RichSinkFunction[SensorReading] {
  var conn: Connection = _
  var insertStmt: PreparedStatement = _
  var updateStmt: PreparedStatement = _

  // 初始化,创建连接和预编译语句

  override def open(parameters: Configuration): Unit = {
    super.open(parameters)

    conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "000000")
    insertStmt = conn.prepareStatement("INSERT INTO tablename (sensor, temp) VALUES (?, ?)")
    updateStmt = conn.prepareStatement("UPDATE temperatures SET temp = ? WHERE sensor = ?")
  }

  // 调用连接执行SQL
  override def invoke(value: SensorReading, context: Context[_]): Unit = {
    // 执行更新语句
    updateStmt.setDouble(1, value.temperature)
    updateStmt.setString(2, value.id)
    updateStmt.execute()

    // 如果updataStmt没查到执行插入
    if (updateStmt.getUpdateCount == 0) {
      insertStmt.setString(1, value.id)
      insertStmt.setDouble(2, value.temperature)
      insertStmt.execute()
    }
  }

  // 关闭时清理工作
  override def close(): Unit = {
    insertStmt.close()
    updateStmt.close()
    conn.close()
  }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值