Spark Structured Streaming从kafka读数据 并进行wordcount 更新写入mysql

该博客展示了如何使用SparkSession和DataFrame API从Kafka流中读取数据,进行单词计数,然后将结果通过ProcessingTime触发器每秒处理一次,写入到MySQL数据库的wordcount表中。
摘要由CSDN通过智能技术生成
def main(args: Array[String]): Unit = {
    val sparkSession: SparkSession = SparkSession.builder().master("local[*]").appName("wula").getOrCreate()
    val sparkContext: SparkContext = sparkSession.sparkContext
    sparkContext.setLogLevel("warn")
    val kafkaDF: DataFrame = sparkSession.readStream.format("kafka").option("kafka.bootstrap.servers", "node01:9092,node02:9092,node03:9092").option("subscribe", "test").load()
    import sparkSession.implicits._
    val kafkaDataS: Dataset[(String, String)] = kafkaDF.selectExpr("CAST(key as string)", "CAST(value as string)").as[(String, String)]
    val wordCount = kafkaDataS.flatMap(_._2.split(" ")).groupBy("value").count().sort($"count")

    val toMysql = new dataToMysql("jdbc:mysql://node01:3306/test?characterEncoding=UTF-8", "root", "123456")
    wordCount.writeStream.trigger(Trigger.ProcessingTime(0)).foreach(toMysql).outputMode("complete").start().awaitTermination()

  }

  class dataToMysql(url: String, user: String, password: String) extends ForeachWriter[Row] with Serializable {
    var connection: Connection = _
    var preparedStatement: PreparedStatement = _

    override def open(partitionId: Long, version: Long): Boolean = {
      connection = DriverManager.getConnection(url, user, password)
      true
    }

    override def process(value: Row): Unit = {
      val word = value.get(0).toString
      val wordC = value.get(1).toString.toInt
      println(word + "____" + wordC)
      val sql = "replace into `wordcount` (`id`,`word`,`wordcount`) values(null,?,?);"
      preparedStatement = connection.prepareStatement(sql)
      preparedStatement.setString(1, word)
      preparedStatement.setInt(2, wordC)
      preparedStatement.executeUpdate()
    }

    override def close(errorOrNull: Throwable): Unit = {
      if (connection != null) {
        connection.close()
      }
      if (preparedStatement != null) {
        preparedStatement.close()
      }
    }
  }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值