最近一小时广告点击量
统计各广告最近 1 小时内的点击量趋势:各广告最近 1 小时内各分钟的点击量
Redis存储数据结构如图所示:
1 思路分析
使用窗口函数进行最近一小时的数据统计,并将结果写入Redis。
2 代码实现
1)LastHourAdsHandler
object LastHourAdsHandler {
def statLastHourAds(filteredDStream: DStream[AdsInfo]) = {
val dateFormatter = new SimpleDateFormat(“HH:mm”)
// 1. 利用窗口来对DStream进行开窗
val DStreamWithWindow: DStream[AdsInfo] = filteredDStream.window(Minutes(2), Seconds(4))
val hourMinutesCount = DStreamWithWindow.map(adsInfo => {
((adsInfo.adsId, dateFormatter.format(new Date(adsInfo.ts))), 1)
}).reduceByKey(_ + _).map {
case ((adsId, hourMinutes), count) => (adsId, (hourMinutes, count))
}
// 2. 转成json格式的字符串
val adsIdHourMintesJson: DStream[(String, String)] = hourMinutesCount.groupByKey.map {
case (adsId, hourMinutsCountIt) => {
import org.json4s.JsonDSL._
(adsId, JsonMethods.compact(JsonMethods.render(hourMinutsCountIt)))
}
}
// 3. 写入redis
adsIdHourMintesJson.foreachRDD(rdd => {
val client: Jedis = RedisUtil.getJedisClient
val result: Array[(String, String)] = rdd.collect
result.foreach(println)
import scala.collection.JavaConversions._
client.hmset(“last:hour:ads:click”, result.toMap)
client.close()
})
}
}
2)RealtimeApp
object RealtimeApp {
def main(args: Array[String]): Unit = {
// 从kafka中读出我们需要数据
// 1. 创建 SparkConf 对象
val conf: SparkConf = new SparkConf()
.setAppName(“RealTimeApp”)
.setMaster(“local[*]”)
// 2. 创建 SparkContext 对象
val sc = new SparkContext(conf)
// 3. 创建 StreamingContext
val ssc = new StreamingContext(sc, Seconds(2))
// 4. 得到 DStream
val recordDStream: InputDStream[ConsumerRecord[String, String]] = MyKafkaUtil.getDStream(ssc, “ads_log”)
// 5. 为了方便后面的计算, 把消费到的字符串封装到对象中
val adsInfoDStream: DStream[AdsInfo] = recordDStream.map {
record =>
val split: Array[String] = record.value.split(“,”)
AdsInfo(split(0).toLong, split(1), split(2), split(3), split(4))
}
// 6: 需求5:
val filteredDStream: DStream[AdsInfo] = BlackListApp.filterBlackList(adsInfoDStream, sc)
BlackListApp.checkUserToBlackList(filteredDStream)
// 7. 需求6:
val dayAreaAdsCityCount: DStream[(String, Int)] = DayAreaCityAdsApp.statAreaCityAdsPerDay(filteredDStream, sc)
// 8. 需求 7
AreaAdsTop3.statAreaAdsTop3(dayAreaAdsCityCount)
// 9. 需求8
LastHourAdsApp.statLastHourAds(filteredDStream)
ssc.start()
ssc.awaitTermination()
}
}