pythonmysql数据分析 tableau_python执行mysql 计算复购率+pyechart+Excel+Tableau绘制双Y轴图...

现有某超市的订单数据,内容如下:

先求每个会员在每个月的订单数

sql = ''' --

select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth

from OrderList

where MemberID IS NOT NULL

group by MemberID ,YearMonth

order by YearMonth

;

'''

df = pd.read_sql_query(sql, engine)

df.head(8)

给上表新增一列,判断其是否在本月多次购买

select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,

case when temp.每个会员的订单数>1 then 1 else null

end as 是否在本月多次购买

from

(

select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth

from OrderList

where MemberID IS NOT NULL

group by MemberID ,YearMonth

order by YearMonth

) temp

;

再对上表进行分组统计多少会员是复购会员

sql = ''' -- 使用 count(temp_out.是否在本月多次购买) 也可以

select temp_out.YearMonth ,count(temp_out.MemberID) AS 本月购物的会员数, sum(temp_out.是否在本月多次购买) as 复购会员人数,

sum(temp_out.是否在本月多次购买)/count(temp_out.MemberID) as 占比

from (

select temp.YearMonth,temp.MemberID, temp.每个会员的订单数,

case when temp.每个会员的订单数>1 then 1 else null

end as 是否在本月多次购买

from

(

select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth

from OrderList

where MemberID IS NOT NULL

group by MemberID ,YearMonth

order by YearMonth

) temp

) temp_out

group by temp_out.YearMonth

;

'''

df = pd.read_sql_query(sql, engine)

df

上面的方法嵌套了三层子表:orderlist本身, temp , temp_out,实际上可以不用产生 是否在本月多次购买的子表,即上表的temp表,可以使用 sum(case when 每个会员的订单数 >1 else 0 end )统计,这样就少产生了一个子表。

sql = '''

select temp.YearMonth ,count(temp.MemberID) AS 本月购物的会员数,

sum(case when temp.每个会员的订单数 >1 then 1 else 0 end) as 复购会员人数,

sum(case when temp.每个会员的订单数 >1 then 1 else 0 end)/count(temp.MemberID) as 占比

from

(

select MemberID, count(SheetID) AS 每个会员的订单数,substring(STime,1,7) as YearMonth

from OrderList

where MemberID IS NOT NULL

group by MemberID ,YearMonth

order by YearMonth

) temp

group by temp.YearMonth

;

'''

df = pd.read_sql_query(sql, engine)

df.tail(24).to_csv('每月的复购率.csv',encoding='utf_8_sig')

df.head(8)

使用pyecharts 0.5画图

#0.5版本

import pyecharts

from pyecharts import Overlap, Bar, Line, Grid, EffectScatter

grid = Grid()

v1 = list( df.tail(24)['本月购物的会员数'].values)

v2 = list(df.tail(24)['复购会员人数'].values)

v3 = list( df.tail(24)['占比'].values*100) #

my_attr = list(df.tail(24)['YearMonth'].values) # ["{}号".format(i) for i in range(1, len(v1)+1)] #attr =

bar = Bar(title="DeepWind超市(南沙区)", title_pos="20%")

bar.add("会员人数", my_attr, v1)

bar.add("复购人数",my_attr,v2,yaxis_formatter=" 人",

yaxis_max=5200,

legend_pos="25%",

legend_orient="horizontal",

legend_top="15%",

)

overlap = Overlap(width=1200, height=600)

overlap.add(bar)

line = Line()

line.add("复购人数占比", my_attr, v3, yaxis_formatter=" %",yaxis_max=100)

es = EffectScatter()

#overlap = Overlap(width=1200, height=600)

overlap.add(line, is_add_yaxis=True, yaxis_index=1)

#es.add("", my_attr, v3, effect_scale=8,is_add_yaxis=True, yaxis_index=1,yaxis_max=4000)

#overlap.add(es)

grid.add(overlap, grid_right="20%")

grid.render()

overlap.render()

#bar

grid

使用pyecharts1.6 画图

import pyecharts.options as opts

from pyecharts.charts import Bar, Line

from pyecharts.globals import ThemeType

v1 = list( df.tail(24)['本月购物的会员数'].values)

v2 = list(df.tail(24)['复购会员人数'].values)

v3 = list( df.tail(24)['占比'].values*100) #

x_data= list(df.tail(24)['YearMonth'].values)

v1 = [int(each) for each in v1]

v2 = [int(each) for each in v2]

v3 = [int(each) for each in v3]

x_data= [str(each) for each in x_data]

bar = (

Bar(init_opts=opts.InitOpts(width="800px", height="400px",theme=ThemeType.DARK))

.add_xaxis(xaxis_data=x_data)

.add_yaxis(

series_name="会员人数",

yaxis_data= v1,

label_opts=opts.LabelOpts(is_show=False),

markpoint_opts=opts.MarkPointOpts(

data=[

opts.MarkPointItem(type_="max", name="最大值"),

opts.MarkPointItem(type_="min", name="最小值"),

]

),

)

.add_yaxis(

series_name="复购人数",

yaxis_data= v2,

label_opts=opts.LabelOpts(is_show=False),

markpoint_opts=opts.MarkPointOpts(

data=[

opts.MarkPointItem(type_="max", name="最大值"),

opts.MarkPointItem(type_="min", name="最小值"),

]

),

)

.extend_axis(

yaxis=opts.AxisOpts(

name="占比",

type_="value",

min_=50,

max_=100,

interval=10,

axislabel_opts=opts.LabelOpts(formatter="{value} %"),

)

)

.set_global_opts(

tooltip_opts=opts.TooltipOpts(

is_show=True, trigger="axis", axis_pointer_type="cross"

),

xaxis_opts=opts.AxisOpts(

type_="category",

axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),

),

yaxis_opts=opts.AxisOpts(

name="人数",

type_="value",

min_=0,

max_=5500,

interval=500,

axislabel_opts=opts.LabelOpts(formatter="{value}人"),

axistick_opts=opts.AxisTickOpts(is_show=True),

splitline_opts=opts.SplitLineOpts(is_show=True),

),

)

)

line = (

Line()

.add_xaxis(xaxis_data=x_data)

.add_yaxis(

series_name="占比",

yaxis_index=1,

y_axis=v3,

label_opts=opts.LabelOpts(is_show=False),

markpoint_opts=opts.MarkPointOpts(

data=[

opts.MarkPointItem(type_="max", name="最大值"),

opts.MarkPointItem(type_="min", name="最小值"),

]

),

)

)

bar.overlap(line).render("复购人数.html")

bar.overlap(line).render_notebook()#render("mixed_bar_and_line.html")

使用Excel画双Y轴图

此时只有一个Y轴,需要对占比重新生成一个列。

设置数据系列格式22.jpg

得到下图

最终得到

g

使用Tableau画双轴图

将得到

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值