val ResultStreamDF = InputStreamDF
.selectExpr("CAST(value AS STRING)")
.as[String]
.filter(
line => null != line &&
line.trim.split(",").length == 6
&& "success".equals(line.trim.split(",")(3))
)
.map(msg =>{
val Array( stationId, callOut, callIn, callStatus, callTime, duration) =msg.trim.split(",")
( stationId, callOut, callIn, callStatus, callTime, duration)
})
.toDF( "stationId", "callOut", "callIn", "callStatus", "callTime", "duration")
.select(
to_json(struct($"*")).as("value")
)
val query: StreamingQuery = ResultStreamDF
.writeStream
.queryName("query-state-etl")
.outputMode(OutputMode.Append())
// TODO: 设置连续处理Continuous Processing
.format("kafka")
.option("kafka.bootstrap.servers", "node1.itcast.cn:9092")
.option("topic", "etlTopic")
.option("checkpointLocation", "data/structured/station-etl-1003")
.start()
query.awaitTermination()
query.stop()
Structed Streaming 将数据封装成Json数据
最新推荐文章于 2022-10-05 13:50:13 发布