启动如下的代码
Socket流 LInux命令:nc -lk 9999,输入字符查看程序执行结果
Hdfs : hdfs dfs -put ./### /spark
package org.example.sparkstreaming
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
object Streaming_Sql {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
val conf = new SparkConf().setAppName("SQLtest").setMaster("local[2]")
val ssc = new StreamingContext(conf, Seconds(5))
val lines = ssc.textFileStream("hdfs://192.168.172.131:9000/spark")
//val lines = ssc.socketTextStream("192.168.172.131",9999)
lines.foreachRDD
{
(rdd: RDD[String]) =>
val words = rdd.map(x=> User(
x.split(" ")(0), x.split(" ")(1).toInt, x.split(" ")(2)
))
val spark = SparkSession.builder.config(conf).getOrCreate()
import spark.implicits._
val wordsDataFrame = words.toDF()
wordsDataFrame.createOrReplaceTempView("table")
var result:DataFrame = spark.sql("select * from table")
result.show()
//Sql查询结果转换成RDD判断是否有数据,有数据则写入本地
var is_empty = result.rdd.isEmpty()
if(is_empty == false){
result.repartition(1).write.format("csv").mode("append").save("output")
}
}
ssc.start()
ssc.awaitTermination()
}
case class User(name:String, age:Int ,phone:String)
}
socket/hdfs数据(age必须是数字)
蔡志远 25 18098316705
波波波结衣 18 13698786412
安倍晋三 66 18036963212
工藤新一 21 18696478962
野原新之助 5 16987159638
.show()
保存本地的CSV文件