数据库相连的消费者分析(一)

import pandas as pd
import numpy as np
from sqlalchemy import create_engine

def reader(query,db='data'):
    sql = query
    engine = create_engine('mysql+pymysql://root:messi123@localhost:3306/{0}?charset=utf8'.format(db))
    df = pd.read_sql(sql,engine)
    return df
    
reader(
"""
select
    data(paidTime) as order_dt,
    userId as user_id,
    sum(price) as order_amount,
    count(orderId) as order_products
from data.orderinfo
where isPaid = '已支付' and paidTime > '0000-00-00 00:00:00'
group by data(paidTime),userId
""")

df['order_dt'] = pd.to_datetime(df.order_dt,format="%Y-%m-%d")
df['month'] = df.order_dt.values.astype('datetime64[M]')

user_idorder_dtorder_productsorder_amountmonth
011997-01-01111.771997-01-01
121997-01-12112.001997-01-01
columns = ['user_id','order_dt','order_products','order_amount']
df = pd.read_table('D:\\data\CDNOW_master.txt',names=columns,sep='\s+')
user_idorder_dtorder_productsorder_amount
011997-01-01111.77
121997-01-12112.00
df['order_dt']=pd.to_datetime(df.order_dt,format='%Y%m%d')
df['month']=df.order_dt.values.astype('datetime64[M]')
df.head(2)
user_idorder_dtorder_productsorder_amountmonth
011997-01-01111.771997-01-01
121997-01-12112.001997-01-01
df.describe()
user_idorder_productsorder_amount
count69659.00000069659.00000069659.000000
mean11470.8545922.41004035.893648
std6819.9048482.33392436.281942
min1.0000001.0000000.000000
25%5506.0000001.00000014.490000
50%11410.0000002.00000025.980000
75%17273.0000003.00000043.700000
max23570.00000099.0000001286.010000

1.进行用户消费趋势的分析(按月)

每月的消费总金额
每月的消费次数
每月的产品购买数
每月的消费人数

grouped_month=df.groupby('month')
order_month_amount = grouped_month.order_amount.sum()

month
1997-01-01 299060.17
1997-02-01 379590.03
1997-03-01 393155.27
1997-04-01 142824.49
1997-05-01 107933.30
1997-06-01 108395.87
1997-07-01 122078.88
1997-08-01 88367.69
1997-09-01 81948.80
1997-10-01 89780.77
1997-11-01 115448.64
1997-12-01 95577.35
1998-01-01 76756.78
1998-02-01 77096.96
1998-03-01 108970.15
1998-04-01 66231.52
1998-05-01 70989.66
1998-06-01 76109.30
Name: order_amount, dtype: float64

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
order_month_amount.plot()

在这里插入图片描述

# 每月消费次数
order_num = grouped_month.user_id.count()
order_num.plot()

在这里插入图片描述

# 每月产品购买数
grouped_month.order_products.sum().plot()

在这里插入图片描述

# 每月购买人数
df.groupby(['month','user_id']).count().reset_index().groupby('month').count().user_id

month
1997-01-01 7846
1997-02-01 9633
1997-03-01 9524
1997-04-01 2822
1997-05-01 2214
1997-06-01 2339
1997-07-01 2180
1997-08-01 1772
1997-09-01 1739
1997-10-01 1839
1997-11-01 2028
1997-12-01 1864
1998-01-01 1537
1998-02-01 1551
1998-03-01 2060
1998-04-01 1437
1998-05-01 1488
1998-06-01 1506
Name: user_id, dtype: int64

grouped_month.user_id.apply(lambda x:len(x.drop_duplicates())).plot()

在这里插入图片描述

df.pivot_table(index = 'month',
              values = ['order_products','order_amount','user_id'],
              aggfunc={'order_products':'sum',
                      'order_amount':'sum',
                      'user_id':'count'}).reset_index()
monthorder_amountorder_productsuser_id
01997-01-01299060.17194168928
11997-02-01379590.032492111272
21997-03-01393155.272615911598
# 每月用户平均消费金额
user_num = grouped_month.user_id.apply(lambda x:len(x.drop_duplicates()))
average_amount = order_month_amount/user_num
average_amount.plot()

在这里插入图片描述

average_num = order_num/user_num
average_num.plot()

在这里插入图片描述

2.用户个体消费分析

用户消费金额、消费次数的描述统计
用户消费金额和消费次数的散点图
用户消费金额的分布图
用户消费次数的分布图
用户累计消费金额占比(百分之多少的用户占了百分之多少的消费额)

grouped_user = df.groupby('user_id')
grouped_user.sum().describe()
grouped_user.sum().query('order_amount<4000').plot.scatter(x='order_products',y='order_amount')
plt.style.use('ggplot')

在这里插入图片描述

grouped_user.sum().query('order_amount<300').order_amount.plot.hist(bins=20)

在这里插入图片描述

# 用户累计消费金额占比
user_cumsum=grouped_user.sum().sort_values('order_amount').apply(lambda x:x.cumsum()/x.sum())
user_cumsum.reset_index().order_amount.plot()

在这里插入图片描述

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值