import pandas as pd
order = pd.read_table('meal_order_info.csv',sep = ',',encoding = 'gbk')
order['lock_time'] = pd.to_datetime(order['lock_time'])
print('最小时间为:', pd.Timestamp.min)
print('最大时间为:', pd.Timestamp.max)
dateIndex = pd.DatetimeIndex(order['lock_time'])
print('转换为DatetimeIndex后数据的类型为:\n',type(dateIndex))
periodIndex = pd.PeriodIndex(order['lock_time'],freq = 'S')
print('转换为DatetimeIndex后数据的类型为:\n',type(periodIndex))
print('lock_time中的年份数据前5个为:',[i.year for i in order['lock_time']][:5])
print('lock_time中的月份数据前5个为:',[i.month for i in order['lock_time']][:5])
print('lock_time中的日期数据前5个为:',[i.day for i in order['lock_time']][:5])
print('lock_time中的星期名称数据前5个为:',[i.weekday_name for i in order['lock_time']][:5])
print('dateIndex中的星期名称数据前5个为:\n',dateIndex.weekday_name[:5])
print('periodIndex中的星期标号数据前5个为:',periodIndex.weekday[:5])
print('lock_time在加上一天前前5行数据为:\n',(order['lock_time']+pd.Timedelta(days = 1))[:5])## 将lock_time数据向后平移一天
timeDelta = order['lock_time'] - pd.to_datetime('2017-1-1')
print('lock_time减去2017年1月1日0点0时0分后的数据:\n',timeDelta[:5])
print('lock_time减去time1后的数据类型为:',timeDelta.dtypes)
4 分组 groupby,聚合 agg
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:\
3306/chat?charset=utf8')
detail = pd.read_sql_table('meal_order_detail1',con = engine)
detailGroup = detail[['order_id','counts','amounts']].groupby(by = 'order_id')
print('分组后的订单详情表为:',detailGroup)
print('订单详情表分组后前5组每组的均值为:\n',detailGroup.mean().head())
print('订单详情表分组后前5组每组的标准差为:\n',detailGroup.std().head())
print('订单详情表分组后前5组每组的大小为:','\n',detailGroup.size().head())
print('订单详情表的菜品销量与售价的和与均值为:\n',detail[['counts','amounts']].agg([np.sum,np.mean]))
print('订单详情表的菜品销量总和与售价的均值为:\n',detail.agg({'counts':np.sum,'amounts':np.mean}))
print('菜品订单详情表的菜品销量总和与售价的总和与均值为:\n',detail.agg({'counts':np.sum,'amounts':[np.mean,np.sum]}))
##自定义函数求两倍的和
def DoubleSum(data):
s = data.sum()*2
return s
print('菜品订单详情表的菜品销量两倍总和为:','\n',detail.agg({'counts':DoubleSum},axis = 0))
##自定义函数求两倍的和
def DoubleSum1(data):
s = np.sum(data)*2
return s
print('订单详情表的菜品销量两倍总和为:\n',detail.agg({'counts':DoubleSum1},axis = 0).head())
print('订单详情表的菜品销量与售价的和的两倍为:\n',detail[['counts','amounts']].agg(DoubleSum1))
print('订单详情表分组后前3组每组的均值为:\n',detailGroup.agg(np.mean).head(3))
print('订单详情表分组后前3组每组的标准差为:\n',detailGroup.agg(np.std).head(3))
print('订单详情分组前3组每组菜品总数和售价均值为:\n',detailGroup.agg({'counts':np.sum,'amounts':np.mean}).head(3))
print('订单详情表的菜品销量与售价的均值为:\n',detail[['counts','amounts']].apply(np.mean))
print('订单详情表分组后前3组每组的均值为:','\n', detailGroup.apply(np.mean).head(3))
print('订单详情表分组后前3组每组的标准差为:','\n', detailGroup.apply(np.std).head(3))
print('订单详情表的菜品销量与售价的两倍为:\n',detail[['counts','amounts']].transform(lambda x:x*2).head(4))
print('订单详情表分组后实现组内离差标准化后前五行为:\n',detailGroup.transform(lambda x:(x.mean()-x.min())/(x.max()-x.min())).head())