package com.ws.sparkstreaming
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
// 创建conf
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]")
// 创建sparkContext
val sc = new SparkContext(conf)
// 创建streamingContext,他是对sparkContext的包装,后边的Seconds(2)只得是批处理的时间间隔
val scc = new StreamingContext(sc,Seconds(2))
// 监听socket数据
val stream: DStream[String] = scc.socketTextStream("dream1", 8888)
// rdd操作,但是不是spark的rdd,不过最后还是用的spark的rdd
val value: DStream[(String, Int)] = stream.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
// streaming的打印方法。
value.print()
// 必须要启动才行
scc.start()
// 不退出
scc.awaitTermination()
}
}
linux 安装nc客户端 yum install nc -y
nc -lk 8888