前段时间集中于理论学习,而代码写少了。因此手、脑就跟着生疏了,看别人写的代码,居然也半天反应不顾过来。耗时三天
import pandas as pd
import numpy as np
import scipy.stats as ss
import datetime
############ 1、数据读取 ############
# 整体日活
data = pd.read_excel('./dau.xlsx', sheet_name = '日活')
dau_app = data[data['渠道'] == 'App']
# 按城市日活
data_city = pd.read_excel('./dau.xlsx', sheet_name = '日活按城市拆分')
dau_city_app = data_city[data_city['渠道'] == 'App']
# 按机型日活
dau_manu = pd.read_excel('./dau.xlsx', sheet_name = '日活按机型拆分')
dau_manu_app = dau_manu[dau_manu['渠道'] == 'App']
############ 2、数据预处理 ############
# 添加星期维度
dau_app = dau_app.sort_values(by = 'date') #按照按日期进行排序
dau_app['weekday'] = dau_app['date'].apply(lambda x:x.weekday()+1) # 添加日期
dau_manu_app = dau_manu.sort_values(by = 'date') #按照按日期进行排序
dau_manu_app['weekday'] = dau_manu_app['date'].apply(lambda x:x.weekday()+1) # 添加日期
dau_city_app = dau_city_app.sort_values(by='date',ascending=True) # 按照日期进行排序
dau_city_app['weekday'] = dau_city_app['date'].apply(lambda x:x.weekday()+1) # 添加日期
############ 3、探究发现 ############
'''
1. 当预设参数后,dau_app_bef_35d 为空,则接下来计算出来的变量(deta/mean/std/std_1_lower ……)均为nan(无均值→无对比对象)
2. 当预设参数后,dau_app_bef_35d 仅为一行,均值为上一周期其本身,则std/std_1_lower ……均为 nan ()
'''
# 设置参数:
currdate = '2022-05-16'
currdate = pd.to_datetime(currdate)
xdaysbefcurr = currdate - datetime.timedelta(days= 35)
print('xdaysbefcurr',xdaysbefcurr)
print('currdate',currdate)
currweek = currdate.weekday() + 1
# 代入{ get_his_week_dau_ms }的函数体:
thred = {} # 这里注意dict的赋值方式
'''作用:基于当前日期值从而去计算近35天均值'''
dau_app_bef_35d = dau_app[(dau_app['date'] >=xdaysbefcurr)&(dau_app['date'] < currdate)&(dau_app['weekday'] == currweek)]
thred['deta'] = dau_app_bef_35d
thred['mean'] = dau_app_bef_35d['user_num1'].mean()
thred['std'] = dau_app_bef_35d['user_num1'].std()
thred['std_1_lower'] = dau_app_bef_35d['user_num1'].mean() - dau_app_bef_35d['user_num1'].std()
thred['std_1_upper'] = dau_app_bef_35d['user_num1'].mean() + dau_app_bef_35d['user_num1'].std()
thred['std_2_lower'] = dau_app_bef_35d['user_num1'].mean() - 2 * dau_app_bef_35d['user_num1'].std()
thred['std_2_upper'] = dau_app_bef_35d['user_num1'].mean() + 2 * dau_app_bef_35d['user_num1'].std()
thred['std_3_lower'] = dau_app_bef_35d['user_num1'].mean() - 3 * dau_app_bef_35d['user_num1'].std()
thred['std_3_upper'] = dau_app_bef_35d['user_num1'].mean() + 3 * dau_app_bef_35d['user_num1'].std()
thred
index = 24
# 输出返回结果1:
tmp = thred
# 基于thred,对dau_app进行作用
dau_app.loc[index,'his_mean'] = tmp['mean']
dau_app.loc[index,'his_std'] = tmp['std']
dau_app.loc[index,'std_1_lower'] = tmp['std_1_lower']
dau_app.loc[index,'std_1_upper'] = tmp['std_1_upper']
dau_app.loc[index,'std_2_lower'] = tmp['std_2_lower']
dau_app.loc[index,'std_2_upper'] = tmp['std_2_upper']
dau_app.loc[index,'std_3_lower'] = tmp['std_2_lower']
dau_app.loc[index,'std_3_upper'] = tmp['std_3_upper']
# 查看效果:
dau_app[dau_app.index == 24]