Spark-RDD 店铺销售额累加案例

shop1,2019-01-18,500
shop1,2019-02-10,500
shop1,2019-02-10,200
shop1,2019-02-11,600
shop1,2019-02-12,400
shop1,2019-02-13,200
shop1,2019-02-15,100
shop1,2019-03-05,180
shop1,2019-04-05,280
shop1,2019-04-06,220
shop2,2019-02-10,100
shop2,2019-02-11,100
shop2,2019-02-13,100

  • 需求:计算店铺的与销售额和累加到当前月的销售和

import doit.day05_t.utils.SparkUtils
import org.apache.spark.rdd.RDD

object RollupMthIncomeRDD {

  def main(args: Array[String]): Unit = {

    val sc = SparkUtils.createContext(true)
    val lines: RDD[String] = sc.textFile("src/main/scala/data/shop.csv")

    //根据sid、月份进行聚合
    val reduced: RDD[((String, String), Double)] = lines.map(e => {
      val fields = e.split(",")
      val sid = fields(0)
      val dateStr = fields(1)
      val mth = dateStr.substring(0, 7)
      val money = fields(2).toDouble
      //将shopid和mth合起来当成key
      ((sid, mth), money)
    }).reduceByKey(_ + _)

    //根据shop id 分组 排序
    val result: RDD[(String, String, Double, Double)] = reduced.groupBy(_._1._1).mapValues(it => {
      //将迭代器中的数据toList放入到内存
      //并且按照月份排序【字典顺序】
      val sorted: List[((String, String), Double)] = it.toList.sortBy(_._1._2)
      var rollup = 0.0
      //迭代数据
      sorted.map(t => {
        val sid = t._1._1
        val mth = t._1._2
        val mth_sales = t._2
        rollup += mth_sales
        (mth, mth_sales, rollup)
      })
    }).flatMapValues(lst => lst).map(t => (t._1, t._2._1, t._2._2, t._2._3))

    result.foreach(println)

    sc.stop()

  }

}

 

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值