package com.wl.sparkStreaming.kafkaSparkStreaming
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
/*
一个窗口周期内 求出窗口周期内 邀请人数最多的事件 参加的人数最多的事件
* */
object EventAttendeesRawWindow {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("EventAttendeesRawWindow")
val context = new StreamingContext(conf, Seconds(3))
context.checkpoint("checkpoint")
val kafkaParams: Map[String, String] = Map(
(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "study:9092"),
(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"),
(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"),
(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"),
(ConsumerConfig.GROUP_ID_CONFIG, "userfrienda")
)
val kafkaStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(
context,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe(Set("event_attendees_raw"), kafkaParams)
)
/*
event yes maybe invited no
1, 2 3, 4 5 , 6 7 ,9 8
一个窗口周期内 求出窗口周期内 邀请人数最多的事件 参加的人数最多的事件
*/
val yes = kafkaStream.map(line => {
val fields = line.value().toString.split(",")
if (fields.length >= 2 && fields(1).trim.length > 0) {
("eventInfo yes", (fields(0), fields(1).toString.split("\\s+").length))
}
else
("eventInfo yes", (fields(0), 0))
}).reduceByKeyAndWindow((x, y) => {
if (x._2 > y._2) x else y
}, Seconds(9))
yes.print()
val invited = kafkaStream.map(line => {
val fields = line.value().toString.split(",")
if (fields.length >= 4 && fields(3).trim.length > 0) {
("eventInfo invited", (fields(0), fields(3).toString.split("\\s+").length))
}
else
("eventInfo invited", (fields(0), 0))
}).reduceByKeyAndWindow((x, y) => {
if (x._2 > y._2) x else y
}, Seconds(9))
invited.print()
context.start()
context.awaitTermination()
}
}
spark stream and window
最新推荐文章于 2024-10-15 17:09:00 发布