基于python的电商行业数据分析

 导入库

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import datetime

读取数据 

df=pd.read_csv('gss.csv')

计算指标

#客户购买单数
buy_num=pd.DataFrame(df.groupby('Customer ID').size())
buy_num.columns=['number']
#平均客户购买单数
print(round(np.mean(buy_num['number']),1))
#客户消费金额
df['realcost']=df['Sales']*(1-df['Discount'])
buy_cost=pd.DataFrame((df.groupby('Customer ID').sum())['realcost'])
buy_cost.columns=['cost']
#平均客户消费金额
print(round(np.mean(buy_cost['cost']),2))
#不同种类商品的总利润排序
cae_prof=pd.DataFrame((df.groupby('Category').sum())['Profit'])
cae_prof=cae_prof.sort_values(by=['Profit'],ascending=False)
#不同种类再次细分排序
subcat_prof=pd.DataFrame((df.groupby(['Category','Sub-Category']).sum())['Profit'])
subcat_prof=subcat_prof.sort_values(by=['Category','Profit'],ascending=False)
#不同商店销售总额排序
mar_sal=pd.DataFrame((df.groupby('Market').sum())['realcost'])
mar_sal=mar_sal.sort_values(by=['realcost'],ascending=False)

#不同商店利润总额排序
mar_prof=pd.DataFrame((df.groupby('Market').sum())['Profit'])
mar_prof=mar_prof.sort_values(by=['Profit'],ascending=False)
#格式化日期
date_time=[]
order_date=df['Order Date']
for i in order_date:
    a=time.strptime(i,'%Y/%m/%d’) #针对字符类型的日期
    date_time.append(a)

#提取年份
year=[]
for j in date_time:
     b=time.strftime('%Y',j)
     year.append(b)
year=pd.DataFrame(year)
year.columns=['year']

#提取月份
month=[]
for m in date_time:
    c=time.strftime('%m',m)
    month.append(c)
month=pd.DataFrame(month)
month.columns=['month']
df['year']=year['year']
df['month']=month['month']
#每年购买人数 
year_customer=pd.DataFrame(df.groupby(['year','Customer ID']).size().count(level='year'))
year_customer.columns=['number']
#每年每月购买人数
df2011=df[df['year']=='2011']
df2012=df[df['year']=='2012']
df2013=df[df['year']=='2013']
df2014=df[df['year']=='2014']
number11=pd.DataFrame(df2011.groupby(['month','CustomerID']).size().count(level='month'))
number11.columns=['number11']
number12=pd.DataFrame(df2012.groupby(['month','CustomerID']).size().count(level='month'))
number12.columns=['number12']
number13=pd.DataFrame(df2013.groupby(['month','CustomerID']).size().count(level='month'))
number13.columns=['number13']
number14=pd.DataFrame(df2014.groupby(['month','CustomerID']).size().count(level='month'))
number14.columns=['number14']
table=pd.DataFrame()
table['2011']=number11['number11']
table['2012']=number12['number12']
table['2013']=number13['number13']
table['2014']=number14['number14']
print(number11.index)
#年流失率
user_min=pd.DataFrame((df.groupby(['Customer ID']).min())['year'])
#mindate=pd.DataFrame(user_min.groupby('year').size())
user_min=user_min.reset_index()
userid=user_min[user_min['year']=='2011']
user=pd.DataFrame(df[df['year']=='2012']['Customer ID'].unique())
user.columns=['Customer ID']
tab=pd.merge(userid,user)

#merge按照相同的列进行合并,内连接
#月流失率
user_month=pd.DataFrame((df2014.groupby(['Customer ID']).min())['month'])
#minmonth=pd.DataFrame(user_month.groupby('month').size())
user_month=user_month.reset_index()
userm=user_month[user_month['month']=='01']
userm__=pd.DataFrame(df2014[df2014['month']=='02']['Customer ID'].unique())
userm__.columns=['Customer ID']
tabmonth=pd.merge(userm,userm__)
#每一年的平均购物车大小
year_avg_cost=pd.DataFrame((df.groupby(['year']).mean())['realcost’])

#用户每一年的购买次数
user_year_num=pd.DataFrame(df.groupby(['Customer ID','year']).size().unstack(fill_value=0))
avg_num=[np.mean(user_year_num['2011']),np.mean(user_year_num['2011']),np.mean(user_year_num['2013']),np.mean(user_year_num['2014'])]
#每一年平均每位客户带来的利润
cou_year_pro=pd.DataFrame((df.groupby(['Customer ID','year']).sum().unstack(fill_value=0))['Profit'])
avg_pro=pd.DataFrame(columns=['2011','2012','2013','2014'],index=['avg_pro'])
avg_pro['2011']=np.mean(cou_year_pro['2011'])
avg_pro['2012']=np.mean(cou_year_pro['2012'])
avg_pro['2013']=np.mean(cou_year_pro['2013'])
avg_pro['2014']=np.mean(cou_year_pro['2014'])
avg_pro=avg_pro.transpose()
#快递所需要的天数
order_time=[]
order_date1=df['Order Date']
for e in order_date1:
    f=time.strptime(e,'%Y/%m/%d')
    order_time.append(f)

send_time=[]
send_date=df['Ship Date']
for o in send_date:
    p=time.strptime(o,'%Y/%m/%d')
    send_time.append(p)

result=[]
for aa,bb in zip(order_time,send_time):
    date1=datetime.datetime(aa[0],aa[1],aa[2])
    date2=datetime.datetime(bb[0],bb[1],bb[2])
    cc=date2-date1
    result.append(cc)

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值