数据分析案例

 部分图例  由于pyechart的图不知道为什么 在jupyter notebook 里面把代码导为HTML 无法显示(有知道可以留言告诉我 为什么导出来的HTML只有代码没有图 在这里先谢谢)

数据集:链接https://pan.baidu.com/s/1QsXOGYoURy_N0PbOqkqo9g icon-default.png?t=M85Bhttps://pan.baidu.com/s/1QsXOGYoURy_N0PbOqkqo9g 
提取码:zgf1

代码部分:

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd 
import os
#绘图
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.globals import ThemeType
import numpy as np
import matplotlib.pyplot as plt
from pyecharts.commons.utils import JsCode


# # 数据加载

# In[2]:


df = pd.DataFrame()
for flie in os.listdir('../data'):
    data_path = os.path.join('../data/',flie)
    dt = pd.read_csv(data_path)
    df = pd.concat([df,dt],axis=0,ignore_index=True)
del dt #释放内存 
df.head()


# ## 添加时间

# In[3]:


df['event_time'] = (df['event_time'].apply(lambda x: x.replace(' UTC','')))


# In[4]:


df['time'] = pd.to_datetime(df['event_time'].apply(lambda x:x.split(' ')[0]))


# In[5]:


df['year'] = df['time'].dt.year
df['month'] = df['time'].dt.month
df['day'] = df['time'].dt.day
df['hour'] = df['time'].dt.hour
df['week'] = df['time'].dt.week


# ##  查看数据情况

# In[87]:


print('表详情:',df.info())
print('-'*30)
print('查看缺失值:\n',df.isnull().sum())
print('-'*30)
print('查看是否有重复:\n',df.duplicated(subset=['event_time', 'event_type', 'product_id', 'category_id','category_code', 'brand', 'price', 'user_id', 'user_session']).value_counts())
print('-'*30)
print('查看销售额异常值:\n',df.describe())
print('-'*30)


# ## price 有异常值负  量少删|

# In[6]:


df.drop(df[df['price']<0].index,inplace=True)


# # 整体流量分析

# ## UV(独立访客量)

# In[89]:


#日访客量
day_data = df.drop_duplicates(subset=['time','user_id'])
day_id = day_data.groupby(by='time').agg({'user_id':'size'})
del day_data
#月访客量
month_data = df.drop_duplicates(subset=['month','user_id'])
month_id = month_data.groupby(by = 'month').agg({'user_id':'size'})
del month_data


# ## PV访客量

# In[90]:


day_ids =df.groupby(by='time').agg({'user_id':'size'})
month_ids = df.groupby(by = 'month').agg({'user_id':'size'})


# ## 总销售额

# In[91]:


#日
day_prices = df.groupby(by=['time']).agg({'price':'sum'})
day_mean_price = list(np.around(day_prices.values/day_id['user_id'].values,2)[0])
#月
month_prices = df.groupby(by=['month']).agg({'price':'sum'})
month_mean_price = list(np.around(month_prices.values/day_id['user_id'].values,2)[0])


# ## view-cart-purchase 与时间关系

# In[10]:


week_dt = df.groupby(by=['week','event_type'],as_index=False).size()
week_dt.set_index('week',inplace=True)


# In[11]:


times_week = week_dt.index.unique()
week_view = week_dt[week_dt['event_type']=='view']['size'].to_list()
week_cart = week_dt[week_dt['event_type']=='cart']['size'].to_list()
week_purchase = week_dt[week_dt['event_type']=='purchase']['size'].to_list()


# In[94]:


background_color_js = (
   "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
   "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
   )
line = Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
line.add_xaxis(times_week)
line.add_yaxis(
   series_name='浏览累计行为',
   y_axis=week_view,
   label_opts=opts.LabelOpts(is_show=False),
   linestyle_opts=opts.LineStyleOpts(width=4),
   is_step=True
)
line.add_yaxis(
   series_name='加入购物车累计行为',
   y_axis=week_cart,
   label_opts=opts.LabelOpts(is_show=False),
#     is_smooth=True,
   linestyle_opts=opts.LineStyleOpts(width=4),
   is_step=True
)
line.add_yaxis(
   series_name='购买累计行为',
   y_axis=week_purchase,
   label_opts=opts.LabelOpts(is_show=False),
   linestyle_opts=opts.LineStyleOpts(color='green',width=4),
   is_step=True
)


line.set_global_opts(
   toolbox_opts=opts.ToolboxOpts(is_show=True),
   tooltip_opts=opts.TooltipOpts(trigger='axis')
)
line.render_notebook()


# In[95]:


del week_cart,week_dt,week_purchase,week_view


# ## 整体网站分析

# In[96]:


#日
def Mapping_Line(xs,x,ys,y,ys_name,y_name,chart_name,price_ys,price_ys_name,price_y,price_y_name):
    line = Line(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
    line.add_xaxis(xs)
    line.add_yaxis(
        series_name=ys_name,
        y_axis=ys,
        is_smooth=True,
        label_opts=opts.LabelOpts(is_show=False),
        is_clip=False,
        areastyle_opts=opts.AreaStyleOpts(opacity=0.4)
    )
    line.add_xaxis(x)
    line.add_yaxis(
        series_name=y_name,
        y_axis=y,
        is_smooth=True,
        label_opts=opts.LabelOpts(is_show=False),
        is_clip=False,
        areastyle_opts=opts.AreaStyleOpts(opacity=0.4)
    )
     #总销售
    line.add_yaxis(
        series_name=price_ys_name,
        y_axis=price_ys,
        is_smooth=True,
        label_opts=opts.LabelOpts(is_show=False),
        is_clip=False,
        areastyle_opts=opts.AreaStyleOpts(opacity=0.4)
    )
    #人均
    line.add_yaxis(
        series_name=price_y_name,
        y_axis=price_y,
        is_smooth=True,
        label_opts=opts.LabelOpts(is_show=False),
        is_clip=False,
        areastyle_opts=opts.AreaStyleOpts(opacity=0.4)
    )

    line.set_global_opts(
            title_opts=opts.TitleOpts(title=chart_name),
            tooltip_opts=opts.TooltipOpts(trigger="axis"),
            toolbox_opts=opts.ToolboxOpts(is_show=True)
    )
    return line


# ### 销售额图

# In[97]:


def Price_Line(x,ys,ys_name,y,y_name,chart_name):
    line = Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    line.add_xaxis(x)
    #总销售
    line.add_yaxis(
        series_name='销售额',
        y_axis=ys
    )
    #人均
    line.add_yaxis(
        series_name='人均',
        y_axis=y
    )
    line.set_global_opts(
        title_opts=opts.TitleOpts(title='销售额与时间'),
        tooltip_opts=opts.TooltipOpts(is_show=True),
        toolbox_opts=opts.ToolboxOpts(is_show=True)
    )
    return line


# ###  1

# In[98]:


Mapping_Line(xs=day_ids.index.to_list(),
             x=day_id.index.to_list(),
             ys=day_ids['user_id'].to_list(),
             y=day_id['user_id'].to_list(),
             ys_name='访客量',
             y_name='独立访客量',
             chart_name='日访客量',
             price_ys=day_prices['price'].to_list(),
              price_y=day_mean_price,
              price_ys_name='总购买金额',
              price_y_name ='人均购买金额',
            ).render_notebook()


# In[99]:


Mapping_Line(xs=[str(x) for x in month_ids.index.to_list()],
             x=[str(x) for x in month_id.index.to_list()],
             ys=list(month_ids['user_id']),
             y=list(month_id['user_id']),
             ys_name='访客量',
             y_name='独立访客量',
             chart_name='月访客量',
            price_ys=month_prices['price'].to_list(),
              price_y=month_mean_price,
              price_ys_name='总购买金额',
              price_y_name ='人均购买金额',).render_notebook()


# In[100]:


#释放内存
del month_id,month_ids,day_id,day_ids,day_prices,day_mean_price


# ## 转化率

# In[101]:


dt = df.groupby(by=['week','event_type'],as_index=False).size()
dt.drop(dt[dt['event_type']== 'remove_from_cart'].index,inplace=True)
dt = dt.set_index('week')


# In[102]:


times_event = dt.index.unique().to_list()


# In[103]:


time_paining = Timeline()
for time in times_event:
    funnel = Funnel()
    x = dt.loc[time]['event_type']
    y = dt.loc[time]['size']
    data = [a for a in zip(x,y)]
    funnel.add('数量',data)
    funnel.set_global_opts(
        title_opts=opts.TitleOpts(title='用户行为漏斗图/周')
    )
    time_paining.add(funnel,time)
time_paining.render_notebook()


# In[104]:


# 释放内存
del dt 


# # 品牌分析

# ## 各品牌销售情况

# In[105]:


#删除缺少值  由于一个品牌有多个价格  无法根据价格确定品牌  也无更好解决方法  缺少率为:40%多
df.dropna(subset='brand',inplace=True)


# In[106]:


#天
day_brand = df.groupby(by=['time','brand'],as_index=False)['price'].sum()
#月
month_brand = df.groupby(by=['month','brand'],as_index=False)['price'].sum()


# In[107]:


#时间轴
times = list(day_brand['time'].apply(lambda x: str(x).split(' ')[0]).unique())


# In[108]:


#数据整理
day_brand.set_index('time',inplace=True)


# In[109]:


#数据整理
def Sort_day_brand(time):
    dt = day_brand.loc[time]
    dt = dt.groupby('brand').agg({'price':'sum'})
    x = dt.index.to_list()
    y = np.around(dt.price.to_list(),2)
    del dt
    return zip(x,y)


# ### 词云图展示

# In[110]:


time_line = Timeline()
for time in times:
    word_cloud = WordCloud()
    word_cloud.add(
        series_name='各品牌销售情况',
        data_pair=Sort_day_brand(time),
        shape='star',#图形
        emphasis_shadow_color=True,

    )
    time_line.add(word_cloud,time)
del day_brand #释放内存
time_line.render_notebook()


# In[ ]:





# ## 品牌月份销售

# ### 数据处理

# In[112]:


month_brand.set_index('month',inplace=True)


# In[113]:


#时间轴
times = month_brand.index.unique().to_list()


# In[114]:


def Brand_month(Month):
    dt = month_brand.loc[time]
    dt = dt.groupby('brand').agg({'price':'sum'})
    x = dt.index.to_list()
    y = np.around(dt.price.to_list(),2)
    del dt
    return zip(x,y)


# In[115]:


time_line = Timeline()
for time in times:
    word_cloud = WordCloud()
    word_cloud.add(
        series_name='各品牌销售情况',
        data_pair=Brand_month(time),
        shape='star',#图形
        emphasis_shadow_color=True,

    )
    time_line.add(word_cloud,time)
del month_brand #释放内存
time_line.render_notebook()


# # 销售转化指标

# ## 购物车指标

# ### 加入购物车次数

# In[116]:


#天
event_type_day = df.groupby(by=['time','event_type'],as_index=False).size()
event_type_day.set_index('time',inplace=True)
#月
event_type_month = df.groupby(by=['month','event_type'],as_index=False).size()
event_type_month.set_index('month',inplace=True)


# In[117]:


#数据处理
#日
dt = event_type_day[event_type_day['event_type']=='cart']
x_day = dt.index.to_list()
y_day = dt['size'].to_list()
del dt
# 月
dt = event_type_month[event_type_month['event_type']=='cart']
x_month = [str(x) for x in dt.index.to_list()]
y_month = dt['size'].to_list()
del dt


# ### 加人购物车 支付转化率

# In[118]:


#日
day_cart_purchase = np.around(event_type_day[event_type_day['event_type']=='cart']['size'].values/event_type_day[event_type_day['event_type']=='purchase']['size'].values,2)
day_x = event_type_day.index.unique().to_list()


# In[119]:


#月
month_cart_purchase = np.around(event_type_month[event_type_month['event_type']=='cart']['size'].values/event_type_month[event_type_month['event_type']=='purchase']['size'].values,2)
month_x = event_type_day.index.unique().to_list()


# #### 绘图  

# In[120]:


def Painting_cart_size(x,y,chart_name,day_cart_purchase):
    background_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
    )
    line = Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
    line.add_xaxis(x)
    line.add_yaxis(
        series_name='加入购物车次数:',
        y_axis=y,
        is_smooth=True,
        is_clip=False,
        areastyle_opts=opts.AreaStyleOpts(0.5),
        label_opts=opts.LabelOpts(is_show=False)
    )
    #加入购物车-支付转化率
    line.add_yaxis(
    series_name='转化率:',
    y_axis=day_cart_purchase,
    areastyle_opts=opts.AreaStyleOpts(opacity=0.4),
    label_opts=opts.LabelOpts(is_show=False),
    markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(name='最大值:',type_='max'),
                opts.MarkPointItem(name='最小值:',type_='min')
            ]
        )
    )
    
    line.set_global_opts(
        title_opts=opts.TitleOpts(title=chart_name),
        toolbox_opts=opts.ToolboxOpts(is_show=True),
        tooltip_opts=opts.TooltipOpts(trigger='axis'),

#         xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False)
    )
    return line.render_notebook()


# In[121]:


#日加购次数
Painting_cart_size(x=x_day,y=y_day,chart_name='日',day_cart_purchase=day_cart_purchase)


# In[122]:


del x_day,y_day


# In[123]:


#月
Painting_cart_size(x=x_month,y=y_month,chart_name='月-加购次数',day_cart_purchase=month_cart_purchase)


# In[124]:


del x_month,y_month


# ## 下单指标

# ### 下单次数

# In[125]:


view_size_day = event_type_day[event_type_day['event_type']=='view']['size'].to_list()
day_x = event_type_day.index.unique().to_list()
view_size_month = event_type_month[event_type_month['event_type']=='view']['size'].to_list()
month_x = [str(x) for x in event_type_month.index.unique()]


# ### 浏览下单转化率

# In[126]:


#日
day_view_purchase = np.around(event_type_day[event_type_day['event_type']=='view']['size'].values/event_type_day[event_type_day['event_type']=='purchase']['size'].values,2)
#月
month_view_purchase = np.around(event_type_month[event_type_month['event_type']=='view']['size'].values/event_type_month[event_type_month['event_type']=='purchase']['size'].values,2)


# #### 绘图

# In[127]:


Painting_cart_size(x=day_x,y=view_size_day,chart_name='日',day_cart_purchase=day_view_purchase)


# In[128]:


Painting_cart_size(x=month_x,y=view_size_month,chart_name='月',day_cart_purchase=month_view_purchase)


# # 用户价值挖掘

# In[7]:


#分组
dt_now = df.groupby(by=['week','user_id','event_type'],as_index=False).size()
dt_now.set_index('week',inplace=True)


# In[8]:


def User(time):
    #变量dt 重新赋值为透视表
    dt = pd.pivot_table(dt_now.loc[time],values='size',index='user_id',columns='event_type',fill_value=0)
    #添加用户 浏览-支付转化率
    dt = dt[dt['view']>0]
    dt['view_purchase'] = round(dt['purchase']/dt['view'],2)
    dt['cart-purchase'] = round(dt['cart']/dt['view'],2)
    #2-4中可见view-purchase 相对 cart 趋势比较一样  
    dt =dt.sort_values(by=['view_purchase','cart-purchase'],ascending=False)
    #添加用户评分 user_score
    dt['user_score'] = round(dt['view_purchase']*0.6 +dt['cart-purchase'] *0.4,0)
    dt =dt[dt['user_score']>20]

    x = dt.index.to_list()
    y = dt.user_score.to_list()
    data = [a for a in zip(x,y)]
    return data


# In[12]:


time_line =Timeline()
for time in times_week:
    word_cloud = WordCloud()
    word_cloud.add(
        series_name='各品牌销售情况',
        data_pair=User(time),
        shape='star',#图形
        emphasis_shadow_color=True,
    )
    time_line.add(word_cloud,time)
time_line.render_notebook()




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值