<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql-kafka-0-10_2.11</artifactId> <version>2.1.1</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.1.1</version> </dependency>
import org.apache.spark.sql.SparkSession
object MyTest {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
import spark.implicits._
val df = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "10.1.10.97:9092")
.option("startingOffsets", "earliest")
.option("subscribe", "userInfo")
.load()
df.printSchema()
val lines = df.selectExpr("CAST(value AS STRING)")
.as[String]
lines.createOrReplaceTempView("userInfo")
val querySql=
"""
|select
|get_json_object(VALUE,'\$.address') as address,
|get_json_object(VALUE,'\$.uname') as uname,
|get_json_object(VALUE,'\$.userId') as userId
|from userInfo
|""".stripMargin
spark.sql(querySql)
.writeStream
.outputMode("update")
.format("console")
.start()
.awaitTermination()
/*
lines.printSchema()
val query: StreamingQuery = lines.writeStream
.outputMode("update")
.format("console")
.start()
query.awaitTermination()*/
/* df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
.as[(String, String)]
.writeStream
.outputMode("update")
.format("console")
.start()
.awaitTermination()*/
}
}
控制台输出:


296

被折叠的 条评论
为什么被折叠?



