spark使用状态函数updateStateByKey算子实现数据入库

def calcProvinceClickTop(dateProvinceCityAdCountsDS:DStream[(String, Int)], sqlContext:SQLContext): Unit = {
//当前批次的记录
val dateProvinceAdCounts:DStream[(String, Int)] = dateProvinceCityAdCountsDS.map{case (dateProvinceCity, count) => {
val fields = dateProvinceCity.split("")
val date = fields(0)
val province = fields(1)
val adid = fields(3).toInt
val key = date + "
" + province + “" + adid
(key, count)
}}/*.reduceByKey(
+)/
//使用状态函数,计算截止到目前为止的数据 seq 代表当前key对应的本批次的新值,option代表当前key对应的历史的值(前置状态)
val usbDStream:DStream[(String, Int)] = dateProvinceAdCounts.updateStateByKey{case (seq:Seq[Int], option:Option[Int]) => {
Option(seq.sum + option.getOrElse(0))
}}
/

求top3,使用sparkStreaming+SparkSQL
*/
usbDStream.foreachRDD(rdd => {
if(!rdd.isEmpty()) {
val rowRDD = rdd.map{case (dateProvinceAd, count) => {
val fields = dateProvinceAd.split("
”)
val date = fields(0)
val province = fields(1)
val ad_id = fields(2).toInt
Row(date, province, ad_id, count)
}}
val schema = StructType(List(
StructField(“date”, DataTypes.StringType, false),
StructField(“province”, DataTypes.StringType, false),
StructField(“ad_id”, DataTypes.IntegerType, false),
StructField(“click_count”, DataTypes.IntegerType, false)
))
val df = sqlContext.createDataFrame(rowRDD, schema)
df.registerTempTable(“date_province_ad_tmp”)
val ret = sqlContext.sql("select " +
"date, " +
"province, " +
"ad_id, " +
"click_count, " +
"row_number() over(partition by province order by click_count desc) rank " +
"from date_province_ad_tmp " +
“having rank < 4”)
ret.show()
//入库
ret.rdd.foreachPartition(partition => {
if(!partition.isEmpty) {
val adProvinceTopDao = new AdProvinceTopDaoImpl
val list = new util.ArrayListAdProvinceTop
partition.foreach(row => {
val apt = new AdProvinceTop
apt.setDate(row.getAsString)
apt.setProvince(row.getAsString)
apt.setClick_count(row.getAsInt)
apt.setAd_id(row.getAsInt)
list.add(apt)
})
adProvinceTopDao.insertBatch(list)
}
})
}
})
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值