package sparkstreamday01.SparkStream import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkConf import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream} import org.apache.spark.streaming.{Seconds, StreamingContext} object SparkStreamTest3 {
//设置日志级别为WARN
Logger.getLogger("org").setLevel(Level.WARN)
def main(args: Array[String]): Unit = { //本地运行的设置 val conf: SparkConf = new SparkConf() .setMaster("local[3]")//至少是两个cores(运行一个,接收一个) .setAppName(this.getClass.getSimpleName) //时间间隔需要根据我们的业务需要设置,不是随意设置的 val ssc: StreamingContext = new StreamingContext(conf,Seconds(5)) //从socket获取数据连接,创建一个将要连接到host:port的Dstream
//从这里读取数据的Receiver 需要一个core val text: ReceiverInputDStream[String] = ssc.socketTextStream("Linux00",9999) //处理业务逻辑(wordcount) val ds: DStream[(String, Int)] = text.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_) //默认打印十行
ds.print() //启动主程序,去连接socket
ssc.start() //阻塞 等待主程序被关闭
ssc.awaitTermination() } }