数据如下:
shop1,2019-01-18,500
shop1,2019-02-10,500
shop1,2019-02-10,200
shop1,2019-02-11,600
shop1,2019-02-12,400
shop1,2019-02-13,200
shop1,2019-02-15,100
shop2,2019-02-10,100
shop2,2019-02-11,100
shop2,2019-02-13,100
shop2,2019-03-15,100
shop2,2019-04-15,100
/**
* 计算店铺的月销售额和累加到当前月的销售额
* shop1,2019-01-18,500
* shop1,2019-02-10,500
* shop1,2019-02-10,200
* shop1,2019-02-11,600
* shop1,2019-02-12,400
* shop1,2019-02-13,200
* shop1,2019-02-15,100
* shop2,2019-02-10,100
* shop2,2019-02-11,100
* shop2,2019-02-13,100
* shop2,2019-03-15,100
* shop2,2019-04-15,100
*
* 思路:先根据sid,月份聚合销售额
* 再根据sid分组,日期排序,累加月销售额
*/
object ShopIncomeSQL {
def main(args: Array[String]): Unit = {
val session = SparkSession.builder()
.appName(this.getClass.getSimpleName)
.master("local[*]")
.getOrCreate()
val df = session.read.option("inferSchema", "true")
.option("header", true)
.csv("data/shop.txt")
df.createTempView("tb_shop")
val tmp1 = session.sql(
"""
|select
| sid,
| mth,
| sum(money) mth_income
|from
|(
| select
| sid,
| substr(ctime,0,7) mth,
| cast(money as DOUBLE) money
| from
| tb_shop
|)
|group by sid, mth
|""".stripMargin)
tmp1.createTempView("v_tmp1")
val res = session.sql(
"""
|select
| sid,
| mth,
| mth_income,
| sum(mth_income) over(partition by sid order by mth rows between unbounded preceding and current row) total_money
|from
| v_tmp1
|""".stripMargin)
res.show()
}
}