广告点击量实时统计
实时统计每天各地区各城市各广告的点击流量,并将其存入Redis。
1 思路分析
使用有状态的转换实现对数据进行按照天维度的聚合统计,并使用RedisAPI将统计的结果数据存入Redis。
2 代码实现
1)DayAreaCityAdsApp
object DayAreaCityAdsApp {
def statAreaCityAdsPerDay(adsInfoDStream: DStream[AdsInfo], sc: SparkContext) = {
sc.setCheckpointDir(“./sparkmall1128”)
val key = “day:area:city:ads”
// 1. 统计数据
val resultDSteam: DStream[(String, Int)] = adsInfoDStream.map(info => (info.toString, 1)).reduceByKey(_ + _)
.updateStateByKey((seq: Seq[Int], opt: Option[Int]) => {
Some(seq.sum + opt.getOrElse(0))
})
// 2. 写入到redis
resultDSteam.foreachRDD(rdd => {
val client: Jedis = RedisUtil.getJedisClient
val totalCountArray: Array[(String, Int)] = rdd.collect
totalCountArray.foreach{
case (field, count) =>{
client.hset(key, field, count.toString)
}
}
client.close()
})
resultDSteam
}
}
2)RealtimeApp
object RealtimeApp {
def main(args: Array[String]): Unit = {
// 从kafka中读出我们需要数据
// 1. 创建 SparkConf 对象
val conf: SparkConf = new SparkConf()
.setAppName(“RealTimeApp”)
.setMaster(“local[*]”)
// 2. 创建 SparkContext 对象
val sc = new SparkContext(conf)
// 3. 创建 StreamingContext
val ssc = new StreamingContext(sc, Seconds(2))
// 4. 得到 DStream
val recordDStream: InputDStream[ConsumerRecord[String, String]] = MyKafkaUtil.getDStream(ssc, “ads_log”)
// 5. 为了方便后面的计算, 把消费到的字符串封装到对象中
val adsInfoDStream: DStream[AdsInfo] = recordDStream.map {
record =>
val split: Array[String] = record.value.split(“,”)
AdsInfo(split(0).toLong, split(1), split(2), split(3), split(4))
}
// 6: 需求5:
val filteredDStream: DStream[AdsInfo] = BlackListApp.filterBlackList(adsInfoDStream, sc)
BlackListApp.checkUserToBlackList(filteredDStream)
// 7. 需求6:
val dayAreaAdsCityCount: DStream[(String, Int)] = DayAreaCityAdsApp.statAreaCityAdsPerDay(filteredDStream, sc)
ssc.start()
ssc.awaitTermination()
}
}