import org.apache.spark.SparkContext
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.json.JSONObject
object kafka2HiveStreaming {
def main(args: Array[String]): Unit = {
// 1.构建上下文
val sparkSession = SparkSession
.builder()
.master("local[*]")
.appName("kafka2HiveStreaming")
.enableHiveSupport()
.getOrCreate()
val sc: SparkContext = sparkSession.sparkContext
val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
val DStream = ssc.socketTextStream("192.168.153.137", 9999)
val result = DStream
.map(t => {
// 将传入的数据转换成 Json
val json = new JSONObject(t)
json
})
.transform(rdd =>
rdd.map(t => {
val aa = t.getString("data")
aa
})
)
val dstream2: DStream[(String, String, String)] = result
.map(t => {
val json2 = new JSONObject(t)
var ID = json2.getString("Id")
var Name = json2.getString("Name")
var phone = json2.getString("phone")
(ID, Name, phone)
})
import sparkSession.implicits._
val dataframeData = dstream2
.foreachRDD(
rdd =>
rdd
.toDF("ID", "NAME", "PHONE")
.coalesce(1)
.write
.mode(SaveMode.Append)
.insertInto("kafka2HiveStreaming")
)
// sparkSession.sql("select * from kafka2HiveStreaming").show()
//sparkSession.sql("insert into kafka2HiveStreaming select * from tmptable")
ssc.start()
ssc.awaitTermination()
}
}