** 【续上篇】 用Python分析用户消费行为 Student Comsumption Analysis ① https://blog.csdn.net/weixin_44216391/article/details/89309643
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline
plt.style.use("ggplot")
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname=r'C:\Windows\Fonts\simhei.ttf',size=14)
sns.set(font=myfont.get_name())
df=pd.read_csv("D:/2018_BigData/Python/Python_files_Notebook/theme_practice/student_consumption_day.csv")
df.head()
|
DealTime |
bf_StudentID |
AccName |
PerSex |
MonDeal |
avgMonDeal |
transaction_times |
month |
| 0 |
2018-07-01 |
13983 |
裘某某 |
男 |
-3.7 |
-3.70 |
1 |
2018-07-01 |
| 1 |
2018-07-01 |
14018 |
虞某某 |
男 |
-9.5 |
-9.50 |
1 |
2018-07-01 |
| 2 |
2018-07-01 |
14073 |
刘某某 |
男 |
-8.0 |
-8.00 |
1 |
2018-07-01 |
| 3 |
2018-07-01 |
14074 |
周某某 |
男 |
-14.3 |
-7.15 |
2 |
2018-07-01 |
| 4 |
2018-07-01 |
14097 |
毛某某 |
男 |
-10.0 |
-10.00 |
1 |
2018-07-01 |
pivoted_counts=df.pivot_table(index="bf_StudentID",columns="month",values="transaction_times",aggfunc="sum").fillna(0)
columns_month=df.month.sort_values().astype("str").unique()
pivoted_counts.columns=columns_month
pivoted_counts.head()
|
2018-07-01 |
2018-08-01 |
2018-09-01 |
2018-10-01 |
2018-11-01 |
2018-12-01 |
2019-01-01 |
| bf_StudentID |
|
|
|
|
|
|
|
| 13012 |
0.0 |
0.0 |
10.0 |
10.0 |
15.0 |
7.0 |
9.0 |
| 13564 |
17.0 |
25.0 |
82.0 |
63.0 |
69.0 |
75.0 |
50.0 |
| 13599 |
8.0 |
10.0 |
39.0 |
33.0 |
34.0 |
31.0 |
26.0 |
| 13685 |
8.0 |
12.0 |
28.0 |
33.0 |
34.0 |
39.0 |
17.0 |
| 13947 |
9.0 |
22.0 |
81.0 |
64.0 |
66.0 |
72.0 |
63.0 |
pivoted_purchase = pivoted_counts.applymap(lambda x: 1 if x > 30 else 0)
pivoted_purchase.head()
|
2018-07-01 |
2018-08-01 |
2018-09-01 |
2018-10-01 |
2018-11-01 |
2018-12-01 |
2019-01-01 |
| bf_StudentID |
|
|
|
|
|
|
|
| 13012 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| 13564 |
0 |
0 |
1 |
1 |
1 |
1 |
1 |
| 13599 |
0 |
0 |
1 |
1 |
1 |
1 |
0 |
| 13685 |
0 |
0 |
0 |
1 |
1 |
1 |
0 |
| 13947 |
0 |
0 |
1 |
1 |
1 |
1 |
1 |
def active_status(data):
status=[]
for i in range(7):
if data[i] == 0:
if len(status) > 0:
if status[i-1] == "unreg":
status.append("unreg")
else:
status.append("unlike_canteen")
else:
status.append("unreg")
else:
if len(status) == 0:
status.append("new")
else:
if status[i-1] == "unlike_canteen":
status.append("occasionally_like_canteen")
elif status[i-1] == "unreg":
status.append("new")
else:
status.append("love_canteen")
return status