本案例主要在于使用pandas的分组聚合函数和日期时间函数做简单分析。
import os #导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
os.chdir("D:\Data\File") #指定工作目录
%matplotlib inline #可视化必要设置
plt.rcParams["font.sans-serif"] = ["KAITI"]
plt.rcParams["axes.unicode_minus"] = False
一、导入excel数据
detail1 = pd.read_excel('meal_order_detail.xlsx',sheet_name='meal_order_detail1')#读取订单详情表文件
detail2 = pd.read_excel('meal_order_detail.xlsx',sheet_name='meal_order_detail2')#读取订单详情表文件
detail3 = pd.read_excel('meal_order_detail.xlsx',sheet_name='meal_order_detail3')#读取订单详情表文件
data = pd.concat([detail1,detail2,detail3],axis=0) #纵向拼接
data.head(3)
detail_id | order_id | dishes_id | logicprn_name | parent_class_name | dishes_name | itemis_add | counts | amounts | cost | place_order_time | discount_amt | discount_reason | kick_back | add_inprice | add_info | bar_code | picture_file | emp_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2956 | 417 | 610062 | NaN | NaN | 蒜蓉生蚝 | 0 | 1 | 49 | NaN | 2016-08-01 11:05:36 | NaN | NaN | NaN | 0 | NaN | NaN | caipu/104001.jpg | 1442 |
1 | 2958 | 417 | 609957 | NaN | NaN | 蒙古烤羊腿 | 0 | 1 | 48 | NaN | 2016-08-01 11:07:07 | NaN | NaN | NaN | 0 | NaN | NaN | caipu/202003.jpg | 1442 |
2 | 2961 | 417 | 609950 | NaN | NaN | 大蒜苋菜 | 0 | 1 | 30 | NaN | 2016-08-01 11:07:40 | NaN | NaN | NaN | 0 | NaN | NaN | caipu/303001.jpg | 1442 |
df1 = data.copy()
1.1 提出分析问题
# 1.订单表的长度,shape,values,columns
# 2.统计菜品的价格平均值(amount)
# 3.频数统计,什么菜最受欢迎
# 4.哪个id点的菜最多
# 5.哪个id吃的钱数多
# 5.哪个id吃的平均菜价贵
# 6.一天什么时候吃饭最多
# 7.哪一天人吃饭最多
# 8.星期几人吃饭最多
# 9.每日菜品总价格,均价,中位数
1.2 查看数据集基本信息
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
二、分析用餐数据
2.1 订单表的长度,shape,values,columns
df1.columns
df1.size
df1.shape
Index(['detail_id', 'order_id', 'dishes_id', 'logicprn_name',
'parent_class_name', 'dishes_name', 'itemis_add', 'counts', 'amounts',
'cost', 'place_order_time', 'discount_amt', 'discount_reason',
'kick_back