代码实现如下:
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
//搜索热词前三,实现实时计算
object WindowScala {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("WindowScala")
val ssc: StreamingContext = new StreamingContext(conf, Seconds(1))
//获取数据,数据格式为(客户识别信息 搜索内容)
val ReceiverDStreamInput: ReceiverInputDStream[String] = ssc.socketTextStream("Hadoop01", 9999)
//将数据进行转化,转化成(搜索内容,1)
val Dstream = ReceiverDStreamInput.map(line => (line.split(" ")(1), 1))
// 对数据做reduceByKeyAndWindow操作,对相同的key,按照value求和,窗口长度为30s,每隔10s生成一个窗口
val windowResults: DStream[(String, Int)] = Dstream.reduceByKeyAndWind