Structured Streaming利用withWatermark和window窗口函数实现WordCount
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql-kafka-0-10_2.11</artifactId>
<version>2.2.0</version>
</dependency>
</dependencies>
package com.test
import java.sql.Timestamp
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.window
import org.apache.spark.sql.streaming.Trigger
object StructureStreaming_new4 {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("StructureStreamin_WordCount")
val spark = SparkSession.builder().config(sparkConf).getOrCreate()
val df:DataFrame= spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers","10.21.13.181:9092")
.option("subscribe","first")
.option("includeTimestamp", true)
.load()
import spark.implicits._
val words = df.selectExpr("key","value","timestamp").as[(String, String,Timestamp)]
val wordCounts = words.toDF("key", "value","timestamp")
.withWatermark("timestamp","2 minutes").groupBy(
window($"timestamp", "2 minutes","1 minutes"), $"value"
).count()
.selectExpr( "window","CAST(value AS STRING)","count")
.orderBy("window")
val query = wordCounts.writeStream
.outputMode("complete")
.format("console")
.option("truncate", "false")
.start()
query.awaitTermination()
}
}
kafka-console-producer.sh --broker-list dn1:9092 --topic first
- 输入数据
- 输出结果
+---------------------------------------------+-----+-----+
|window |value|count|
+---------------------------------------------+-----+-----+
|[2020-06-30 12:28:00.0,2020-06-30 12:30:00.0]|dog |2 |
|[2020-06-30 12:28:00.0,2020-06-30 12:30:00.0]|cat |3 |
|[2020-06-30 12:29:00.0,2020-06-30 12:31:00.0]|cat |3 |
|[2020-06-30 12:29:00.0,2020-06-30 12:31:00.0]|dog |2 |
+---------------------------------------------+-----+-----+