group by cumsum sql ,pandas,saprk,tableau

 

 

Edit

group by cumsum sql ,pandas,saprk,tableau

直接干不就完了,造数据:

 

drop table if exists group_by_cumsum;

create table if not exists group_by_cumsum(create_time datetime,city VarChar(20),sales bigint);

insert into group_by_cumsum values('2018-09-01','上海',1),

('2018-10-01','上海',3),

('2018-11-01','上海',2),

('2018-10-01','上海',3),

('2018-11-01','上海',2),

('2018-09-01','北京',1),

('2018-10-01','北京',2),

('2018-11-01','北京',4),

('2018-09-01','纽约',1),

('2018-10-01','纽约',1),

('2018-11-01','纽约',2);

select * from group_by_cumsum order by city,create_time;

1.普通sql

 

select

create_month,

city,

total_sales,

sum(total_sales) over(partition by city rows between unbounded preceding and current row)

from(

select

date_format(create_time,"%y-%m") as create_month,

city,

sum(sales) as total_sales

from group_by_cumsum

GROUP BY date_format(create_time,"%y-%m"),city

order by city,date_format(create_time,"%y-%m")

)t1

2.开窗on group by

 

select

date_format(create_time,"%y-%m") as create_month,

city,

sum(sum(sales)) over(partition by city order by date_format(create_time,"%y-%m") asc)

from group_by_cumsum

GROUP BY date_format(create_time,"%y-%m"),city

order by city,date_format(create_time,"%y-%m")

3.pandas

 

df.groupby(['date','city']).agg('sales':cumsum)

4.spark

 

#pyspark group by cumsum

from pyspark.sql import Window

from pyspark.sql import functions as F

windowval = (Window.partitionBy('class').orderBy('time').rangeBetween(Window.unboundedPreceding, 0))

df_w_cumsum = df.withColumn('cum_sum', F.sum('value').over(windowval))

df_w_cumsum.show()

5.tableau

Alt text

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值