代码
sparkstreaming本地读取数据代码块
package com.mydemo
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
object strDemo {
def main(args: Array[String]): Unit = {
//1.初始化Spark配置信息
val sparkConf = new SparkConf().setMaster("local[*]")
.setAppName("StreamWordCount")
//2.初始化SparkStreamingContext,间隔时间
val ssc = new StreamingContext(sparkConf, Seconds(5))
//3.监控文件夹
val dirStream = ssc.textFileStream("file:///C:/User/IdeaProjects/spark01/")
//4.将数据进行切分
val wordStreams: DStream[String] = dirStream.flatMap(_.split(","))
//5.将切割后的数据转换为(数据,1)的二元组格式
val wordAndOneStreams = wordStreams.map((_, 1))
//6.通过二元组中相同key的数据求和
val wordAndCountStreams = wordAndOneStreams.reduceByKey(_ + _)
//7.打印
wordAndCountStreams.print()
/