- 案例一
SparkStraming和SparkSql结合实现WordCount
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.sql.{DataFrame, SparkSession}
object SparkStreamingStudy_rddtosql {
def main(args: Array[String]): Unit = {
//1.初始化Spark配置信息
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("StreamWordCount")
//2.初始化SparkStreamingContext
val ssc = new StreamingContext(sparkConf, Seconds(5))
//3.通过监控端口创建DStream,读进来的数据为一行行
val lineStreams = ssc.socketTextStream("10.21.13.181", 9999)
//将每一行数据做切分,形成一个个单词
// wordStreams是DStream类型
val wordStreams = lineStreams.flatMap(x=>x.split(" "))
//sparkSession
val spark = SparkSession.builder().config(sparkConf).getOrCreate()
//为了toDF隐式转换
import spark.implicits._
//wordStreams类型可以通过foreachRDD转换成一个一个RDD
wordStreams.foreachRDD(rdd=>{
//将rdd转换为DF
val rddToDF = rdd.map(x=>(x)).toDF("words")
rddToDF.createOrReplaceTempView("wordCount")
val wordcountDF = spark.sql("select words,count(1) from wordCount group by words")
wordcountDF.show()
})
ssc.start()
ssc.awaitTermination()
}
}
虚拟机上nc -lk 9999输入消息便可得到如下结果:
- 案例二
SparkStraming和SparkSql结合实现结构化样例类
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.sql.{DataFrame, SparkSession}
//创建一个样例类
case class People(name:String,age:Int)
object SparkStreamingStudy_rddtosql1 {
def main(args: Array[String]): Unit = {
//1.初始化Spark配置信息
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("StreamWordCount")
//2.初始化SparkStreamingContext
val ssc = new StreamingContext(sparkConf, Seconds(5))
//3.通过监控端口创建DStream,读进来的数据为一行行
val lineStreams = ssc.socketTextStream("10.21.13.181", 9999)
//sparkSession
val spark = SparkSession.builder().config(sparkConf).getOrCreate()
//为了toDF隐式转换
import spark.implicits._
//wordStreams类型可以通过foreachRDD转换成一个一个RDD
lineStreams.foreachRDD(rdd=>{
//将rdd转换为DF
val rddToDF = rdd.map{x=>val param = x.split(" ");
People(param(0),param(1).trim.toInt)}.toDF
rddToDF.createOrReplaceTempView("people")
val wordcountDF = spark.sql("select name,age from people")
wordcountDF.show()
})
ssc.start()
ssc.awaitTermination()
}
}
结果如下: