测试过程:
1.向虚拟机mini1的7777端口发送一个个的单词信息
2.Streaming程序接受7777端口的数据,并做处理。
3.将DStream[(String, Int)]转化成RDD写入到mysql
程序如下:
package spark.SparkStreaming.file
import java.sql.DriverManager
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object streaming2Mysql {
def main(args: Array[String]): Unit = {
//SparkSession
val spark: SparkSession = SparkSession.builder()
.appName(streaming2Mysql.getClass.getSimpleName)
.master("local[*]")
.getOrCreate()
val sc: SparkContext = spark.sparkContext
val ssc: StreamingContext = new StreamingContext(sc, Seconds(2))
ssc.checkpoint("file:///C:\\ck")
//DStream,迭代计算,并显示内容
ssc.socketTextStream("mini1", 7777)
.flatMap(_.split("\\s+"))
.filter(_.nonEmpty)
.map((_, 1))
.updateStateByKey((nowBatch: Seq[Int], historyResult: Option[Int]) => Some(nowBatch.sum + historyResult.getOrElse(0)))
.foreachRDD(rdd => {
if (!rdd.isEmpty()) {
rdd.foreachPartition(itr => {
if (!itr.isEmpty) {
itr.foreach(perEle => {
val word = perEle._1
val cnt = perEle._2
save2DB(word, cnt)
})
}
})
}
})
//启动SparkStreaming应用
ssc.start
//等待结束(必须要添加)
ssc.awaitTermination
}
/**
* 保存到DB中
* @param word
* @param cnt
*/
def save2DB(word: String, cnt: Int) = {
//加载驱动
classOf[com.mysql.