import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('log.txt',sep='\t',header=None)
df.head()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
---|
0 | 2019162542 | /front-api/bill/create | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 60 | 2018-11-01 00:00:07 |
---|
1 | 162644 | /front-api/bill/create | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 60 | 2018-11-01 00:01:07 |
---|
2 | 162742 | /front-api/bill/create | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 60 | 2018-11-01 00:02:07 |
---|
3 | 162808 | /front-api/bill/create | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 60 | 2018-11-01 00:03:07 |
---|
4 | 162943 | /front-api/bill/create | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 60 | 2018-11-01 00:04:07 |
---|
df.shape
(179496, 9)
df.dtypes
0 int64
1 object
2 int64
3 float64
4 float64
5 float64
6 float64
7 int64
8 object
dtype: object
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179496 entries, 0 to 179495
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 0 179496 non-null int64
1 1 179496 non-null object
2 2 179496 non-null int64
3 3 179496 non-null float64
4 4 179496 non-null float64
5 5 179496 non-null float64
6 6 179496 non-null float64
7 7 179496 non-null int64
8 8 179496 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 12.3+ MB
df.describe()
| 0 | 2 | 3 | 4 | 5 | 6 | 7 |
---|
count | 1.794960e+05 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.0 |
---|
mean | 6.877739e+06 | 7.175909 | 1393.177832 | 108.419626 | 359.880374 | 187.812208 | 60.0 |
---|
std | 6.012494e+06 | 4.325160 | 1499.486073 | 79.640693 | 638.919827 | 224.464813 | 0.0 |
---|
min | 1.626440e+05 | 1.000000 | 36.550000 | 3.210000 | 36.550000 | 36.000000 | 60.0 |
---|
25% | 3.825233e+06 | 4.000000 | 607.707500 | 83.410000 | 198.280000 | 144.000000 | 60.0 |
---|
50% | 6.811510e+06 | 7.000000 | 1154.905000 | 97.120000 | 256.090000 | 167.000000 | 60.0 |
---|
75% | 9.981455e+06 | 10.000000 | 1834.117500 | 116.990000 | 374.410000 | 202.000000 | 60.0 |
---|
max | 2.019163e+09 | 31.000000 | 142650.550000 | 18896.640000 | 142468.270000 | 71325.000000 | 60.0 |
---|
加上列名
列名 描述
- id 自增字段
- api api对应的url
- count 单位时间内被访问的次数
- res_time_sum 响应时间总和(毫秒)
- res_time_min 最小响应时间
- res_time_max 最大响应时间
- res_time_avg 平均值
- interval 采样间隔时间(秒)
- created_at 创建日志时间
df.columns = ['id','api','count','res_time_sum','res_time_min','res_time_max','res_time_avg','interval','created_at']
df.head()
| id | api | count | res_time_sum | res_time_min | res_time_max | res_time_avg | interval | created_at |
---|
0 | 2019162542 | /front-api/bill/create | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 60 | 2018-11-01 00:00:07 |
---|
1 | 162644 | /front-api/bill/create | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 60 | 2018-11-01 00:01:07 |
---|
2 | 162742 | /front-api/bill/create | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 60 | 2018-11-01 00:02:07 |
---|
3 | 162808 | /front-api/bill/create | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 60 | 2018-11-01 00:03:07 |
---|
4 | 162943 | /front-api/bill/create | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 60 | 2018-11-01 00:04:07 |
---|
检测是否有重复值
df.describe()
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | interval |
---|
count | 1.794960e+05 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.000000 | 179496.0 |
---|
mean | 6.877739e+06 | 7.175909 | 1393.177832 | 108.419626 | 359.880374 | 187.812208 | 60.0 |
---|
std | 6.012494e+06 | 4.325160 | 1499.486073 | 79.640693 | 638.919827 | 224.464813 | 0.0 |
---|
min | 1.626440e+05 | 1.000000 | 36.550000 | 3.210000 | 36.550000 | 36.000000 | 60.0 |
---|
25% | 3.825233e+06 | 4.000000 | 607.707500 | 83.410000 | 198.280000 | 144.000000 | 60.0 |
---|
50% | 6.811510e+06 | 7.000000 | 1154.905000 | 97.120000 | 256.090000 | 167.000000 | 60.0 |
---|
75% | 9.981455e+06 | 10.000000 | 1834.117500 | 116.990000 | 374.410000 | 202.000000 | 60.0 |
---|
max | 2.019163e+09 | 31.000000 | 142650.550000 | 18896.640000 | 142468.270000 | 71325.000000 | 60.0 |
---|
df.sample(3)
| id | api | count | res_time_sum | res_time_min | res_time_max | res_time_avg | interval | created_at |
---|
23273 | 2234204 | /front-api/bill/create | 2 | 357.94 | 140.89 | 217.05 | 178.0 | 60 | 2018-11-28 00:31:01 |
---|
83811 | 6426819 | /front-api/bill/create | 11 | 2628.88 | 91.26 | 657.51 | 238.0 | 60 | 2019-02-06 19:56:59 |
---|
85211 | 6519448 | /front-api/bill/create | 7 | 1102.81 | 89.82 | 239.91 | 157.0 | 60 | 2019-02-08 15:16:02 |
---|
检测是否有异常值
df.isnull().any()
id False
api False
count False
res_time_sum False
res_time_min False
res_time_max False
res_time_avg False
interval False
created_at False
dtype: bool
df.sample(3)
| id | api | count | res_time_sum | res_time_min | res_time_max | res_time_avg | interval | created_at |
---|
29671 | 2808559 | /front-api/bill/create | 6 | 777.74 | 103.89 | 162.75 | 129.0 | 60 | 2018-12-05 16:25:16 |
---|
19326 | 1881203 | /front-api/bill/create | 13 | 2141.63 | 112.94 | 249.65 | 164.0 | 60 | 2018-11-23 15:45:52 |
---|
145463 | 10813842 | /front-api/bill/create | 8 | 1225.72 | 90.89 | 301.23 | 153.0 | 60 | 2019-04-22 14:03:39 |
---|
分析api和interval这两列的数据是否对分析有用,如果无用,说明为什么后将这两列丢弃
df['api'].describe()
count 179496
unique 1
top /front-api/bill/create
freq 179496
Name: api, dtype: object
df.interval.unique()
array([60], dtype=int64)
df = df.drop(['api','interval'],axis=1)
api interval 都为唯一值,对数据分析无用,删除
df.head()
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at |
---|
0 | 2019162542 | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 2018-11-01 00:00:07 |
---|
1 | 162644 | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 2018-11-01 00:01:07 |
---|
2 | 162742 | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 2018-11-01 00:02:07 |
---|
3 | 162808 | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 2018-11-01 00:03:07 |
---|
4 | 162943 | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 2018-11-01 00:04:07 |
---|
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179496 entries, 0 to 179495
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 179496 non-null int64
1 count 179496 non-null int64
2 res_time_sum 179496 non-null float64
3 res_time_min 179496 non-null float64
4 res_time_max 179496 non-null float64
5 res_time_avg 179496 non-null float64
6 created_at 179496 non-null object
dtypes: float64(4), int64(2), object(1)
memory usage: 9.6+ MB
使用created_at这一列的数据作为时间索引
df.index = df['created_at']
df.head()
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at |
---|
created_at | | | | | | | |
---|
2018-11-01 00:00:07 | 2019162542 | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 2018-11-01 00:00:07 |
---|
2018-11-01 00:01:07 | 162644 | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 2018-11-01 00:01:07 |
---|
2018-11-01 00:02:07 | 162742 | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 2018-11-01 00:02:07 |
---|
2018-11-01 00:03:07 | 162808 | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 2018-11-01 00:03:07 |
---|
2018-11-01 00:04:07 | 162943 | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 2018-11-01 00:04:07 |
---|
df.index
Index(['2018-11-01 00:00:07', '2018-11-01 00:01:07', '2018-11-01 00:02:07',
'2018-11-01 00:03:07', '2018-11-01 00:04:07', '2018-11-01 00:05:07',
'2018-11-01 00:06:07', '2018-11-01 00:07:07', '2018-11-01 00:08:07',
'2018-11-01 00:09:07',
...
'2019-05-30 23:01:21', '2019-05-30 23:02:21', '2019-05-30 23:03:21',
'2019-05-30 23:04:21', '2019-05-30 23:05:21', '2019-05-30 23:06:21',
'2019-05-30 23:07:21', '2019-05-30 23:08:21', '2019-05-30 23:09:21',
'2019-05-30 23:10:21'],
dtype='object', name='created_at', length=179496)
df.index = pd.to_datetime(df.created_at)
df.index
DatetimeIndex(['2018-11-01 00:00:07', '2018-11-01 00:01:07',
'2018-11-01 00:02:07', '2018-11-01 00:03:07',
'2018-11-01 00:04:07', '2018-11-01 00:05:07',
'2018-11-01 00:06:07', '2018-11-01 00:07:07',
'2018-11-01 00:08:07', '2018-11-01 00:09:07',
...
'2019-05-30 23:01:21', '2019-05-30 23:02:21',
'2019-05-30 23:03:21', '2019-05-30 23:04:21',
'2019-05-30 23:05:21', '2019-05-30 23:06:21',
'2019-05-30 23:07:21', '2019-05-30 23:08:21',
'2019-05-30 23:09:21', '2019-05-30 23:10:21'],
dtype='datetime64[ns]', name='created_at', length=179496, freq=None)
df.head(2)
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at |
---|
created_at | | | | | | | |
---|
2018-11-01 00:00:07 | 2019162542 | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 2018-11-01 00:00:07 |
---|
2018-11-01 00:01:07 | 162644 | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 2018-11-01 00:01:07 |
---|
分析api调用次数情况
plt.rcParams['font.sans-serif'] = [u'SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(16,8))
plt.hist(df['count'],color='g',bins=30)
plt.xlabel('调用次数')
plt.ylabel('调用次数计数')
plt.title('调用次数分布')
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/9b2e012faf7d455f80f046b66d586145.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
-
调用次数基本在30次以内,集中在10次以内
-
选取一天,差看各个时间段的调用情况
df2 = df.loc['2018-11-01','count']
df2.head()
created_at
2018-11-01 00:00:07 8
2018-11-01 00:01:07 5
2018-11-01 00:02:07 5
2018-11-01 00:03:07 9
2018-11-01 00:04:07 3
Name: count, dtype: int64
plt.figure(figsize=(16,8))
plt.plot(df2,color='g')
plt.xlabel('调用次数')
plt.ylabel('调用次数计数')
plt.title('调用次数分布')
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/5280ec35310d44beaf57c4259c1f4f61.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
- 凌晨时间无人访问, 下午2,3点第一个访问高峰,晚上,8,9点,第二个访问高峰
df2 = df.loc['2018-11-1']
df2 = df2[['count']].resample('1H').mean()
df2.head()
| count |
---|
created_at | |
---|
2018-11-01 00:00:00 | 3.090909 |
---|
2018-11-01 01:00:00 | 1.793103 |
---|
2018-11-01 02:00:00 | 1.000000 |
---|
2018-11-01 03:00:00 | NaN |
---|
2018-11-01 04:00:00 | NaN |
---|
df2.fillna(0,inplace=True)
df2.isnull().any()
count False
dtype: bool
df2.head()
| count |
---|
created_at | |
---|
2018-11-01 00:00:00 | 3.090909 |
---|
2018-11-01 01:00:00 | 1.793103 |
---|
2018-11-01 02:00:00 | 1.000000 |
---|
2018-11-01 03:00:00 | 0.000000 |
---|
2018-11-01 04:00:00 | 0.000000 |
---|
plt.figure(figsize=(16,8))
plt.plot(df2,color='g')
plt.xlabel('24H')
plt.ylabel('调用次数计数')
plt.title('某天24H调用次数分布')
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/121453c9bb304b5c91f199f2f5d44805.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
plt.figure(figsize=(16,8))
df2['count'].plot(kind = 'bar',color='g')
plt.xticks(rotation = 60)
plt.xlabel('24H')
plt.ylabel('调用次数计数')
plt.title('某天24H调用次数')
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/7bb3509deb12412d987f3bed65fceb30.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
plt.figure(figsize=(16,8))
df.loc['2018-11-1'][['count']].boxplot(showmeans = True, meanline = True)
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/598b30a06dfb45b197c3c537bdb54761.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
df[df['count'] > 20]
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at |
---|
created_at | | | | | | | |
---|
2018-11-01 20:47:09 | 227295 | 21 | 3117.20 | 84.90 | 260.82 | 148.0 | 2018-11-01 20:47:09 |
---|
2018-11-01 21:03:09 | 228772 | 21 | 3706.20 | 78.12 | 321.47 | 176.0 | 2018-11-01 21:03:09 |
---|
2018-11-01 21:13:09 | 229667 | 24 | 4602.03 | 76.31 | 391.12 | 191.0 | 2018-11-01 21:13:09 |
---|
2018-11-02 21:34:11 | 311202 | 30 | 4610.15 | 72.49 | 463.41 | 153.0 | 2018-11-02 21:34:11 |
---|
2018-11-03 14:20:13 | 353337 | 21 | 3113.93 | 74.29 | 266.20 | 148.0 | 2018-11-03 14:20:13 |
---|
... | ... | ... | ... | ... | ... | ... | ... |
---|
2019-05-30 21:33:21 | 13431497 | 27 | 6456.64 | 99.65 | 978.91 | 239.0 | 2019-05-30 21:33:21 |
---|
2019-05-30 21:43:21 | 13432325 | 21 | 6371.84 | 65.98 | 1175.37 | 303.0 | 2019-05-30 21:43:21 |
---|
2019-05-30 21:47:21 | 13432632 | 21 | 3992.83 | 87.83 | 440.88 | 190.0 | 2019-05-30 21:47:21 |
---|
2019-05-30 21:53:21 | 13433108 | 24 | 8467.02 | 120.22 | 1511.17 | 352.0 | 2019-05-30 21:53:21 |
---|
2019-05-30 22:17:21 | 13435027 | 21 | 4926.35 | 85.01 | 826.90 | 234.0 | 2019-05-30 22:17:21 |
---|
746 rows × 7 columns
分析一天中api响应时间
df_avgtime = df.loc['2018-11-1']['res_time_avg']
df_avgtime.head(2)
created_at
2018-11-01 00:00:07 132.0
2018-11-01 00:01:07 149.0
Name: res_time_avg, dtype: float64
plt.figure(figsize=(16,8))
plt.plot(df_avgtime)
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/274065615fba4020914baa36069eb6e9.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
plt.figure(figsize=(16,8))
df.loc['2018-11-1'][['res_time_avg']].boxplot(showmeans=True,meanline=True)
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/cea787319b7c4e4086d058e5d6c37bcd.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
df2 = df.loc['2018-11-1']
df2.loc[df['res_time_avg'] > 300]
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at |
---|
created_at | | | | | | | |
---|
2018-11-01 02:02:07 | 171043 | 1 | 311.54 | 311.54 | 311.54 | 311.0 | 2018-11-01 02:02:07 |
---|
2018-11-01 02:11:07 | 171464 | 1 | 424.30 | 424.30 | 424.30 | 424.0 | 2018-11-01 02:11:07 |
---|
2018-11-01 15:10:09 | 196912 | 10 | 4391.19 | 117.96 | 1211.31 | 439.0 | 2018-11-01 15:10:09 |
---|
2018-11-01 15:24:09 | 198117 | 11 | 5689.44 | 150.16 | 3693.68 | 517.0 | 2018-11-01 15:24:09 |
---|
2018-11-01 18:34:09 | 215229 | 8 | 3175.42 | 68.81 | 2097.52 | 396.0 | 2018-11-01 18:34:09 |
---|
2018-11-01 18:35:09 | 215331 | 6 | 14384.97 | 93.03 | 7628.88 | 2397.0 | 2018-11-01 18:35:09 |
---|
2018-11-01 21:16:09 | 229982 | 19 | 5746.55 | 154.06 | 510.67 | 302.0 | 2018-11-01 21:16:09 |
---|
df3 = df.loc['2018-11-1'][['res_time_sum','res_time_min','res_time_max','res_time_avg']]
df3
| res_time_sum | res_time_min | res_time_max | res_time_avg |
---|
created_at | | | | |
---|
2018-11-01 00:00:07 | 1057.31 | 88.75 | 177.72 | 132.0 |
---|
2018-11-01 00:01:07 | 749.12 | 103.79 | 240.38 | 149.0 |
---|
2018-11-01 00:02:07 | 845.84 | 136.31 | 225.73 | 169.0 |
---|
2018-11-01 00:03:07 | 1305.52 | 90.12 | 196.61 | 145.0 |
---|
2018-11-01 00:04:07 | 568.89 | 138.45 | 232.02 | 189.0 |
---|
... | ... | ... | ... | ... |
---|
2018-11-01 23:55:09 | 594.19 | 117.33 | 229.29 | 148.0 |
---|
2018-11-01 23:56:09 | 1061.85 | 132.71 | 221.12 | 176.0 |
---|
2018-11-01 23:57:09 | 1191.49 | 104.12 | 215.48 | 170.0 |
---|
2018-11-01 23:58:09 | 606.30 | 148.78 | 250.93 | 202.0 |
---|
2018-11-01 23:59:09 | 225.11 | 100.76 | 124.35 | 112.0 |
---|
871 rows × 4 columns
plt.figure(figsize=(16,8))
df.loc['2018-11-1'][['res_time_sum','res_time_min','res_time_max','res_time_avg']].plot(figsize=(16,8))
plt.show()
<Figure size 1152x576 with 0 Axes>
![在这里插入图片描述](https://img-blog.csdnimg.cn/7578e7f06bc349de8f043a5aac6102cb.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
data = df.loc['2018-11-1'].resample('20T').mean()
data[['res_time_sum','res_time_min','res_time_max','res_time_avg']].plot(figsize=(16,8))
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/8d398ba59a9b46a6919f51059b27fb67.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
- 业务高峰时段 下午2-3点,晚上7-8点,响应时间都是上升的
分析连续的几天数据,可以发现,每天的业务高峰时段都比较相似
plt.figure(figsize=(16,8))
df['2018-11-1' : '2018-11-13']['count'].plot()
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/7eb82bf4ef3f4902a0236964a1d31eb0.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
分析周末访问量是否有增加
df.loc['2019-5-2'].index.weekday
Int64Index([3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
...
3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
dtype='int64', name='created_at', length=865)
df['weekday'] = df.index.weekday
df.head()
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at | weekday |
---|
created_at | | | | | | | | |
---|
2018-11-01 00:00:07 | 2019162542 | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 2018-11-01 00:00:07 | 3 |
---|
2018-11-01 00:01:07 | 162644 | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 2018-11-01 00:01:07 | 3 |
---|
2018-11-01 00:02:07 | 162742 | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 2018-11-01 00:02:07 | 3 |
---|
2018-11-01 00:03:07 | 162808 | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 2018-11-01 00:03:07 | 3 |
---|
2018-11-01 00:04:07 | 162943 | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 2018-11-01 00:04:07 | 3 |
---|
df['weekday'] = df['weekday'].isin({5,6})
df.head()
| id | count | res_time_sum | res_time_min | res_time_max | res_time_avg | created_at | weekday |
---|
created_at | | | | | | | | |
---|
2018-11-01 00:00:07 | 2019162542 | 8 | 1057.31 | 88.75 | 177.72 | 132.0 | 2018-11-01 00:00:07 | False |
---|
2018-11-01 00:01:07 | 162644 | 5 | 749.12 | 103.79 | 240.38 | 149.0 | 2018-11-01 00:01:07 | False |
---|
2018-11-01 00:02:07 | 162742 | 5 | 845.84 | 136.31 | 225.73 | 169.0 | 2018-11-01 00:02:07 | False |
---|
2018-11-01 00:03:07 | 162808 | 9 | 1305.52 | 90.12 | 196.61 | 145.0 | 2018-11-01 00:03:07 | False |
---|
2018-11-01 00:04:07 | 162943 | 3 | 568.89 | 138.45 | 232.02 | 189.0 | 2018-11-01 00:04:07 | False |
---|
df_weekday = df.groupby('weekday')['count'].mean()
df_weekday
weekday
False 7.016846
True 7.574989
Name: count, dtype: float64
df_weekday_hour = df.groupby(['weekday', df.index.hour])['count'].mean()
plt.figure(figsize=(16,8))
df_weekday_hour.plot()
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/479a3d258583463b8a4e980f43bff2b5.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
df.groupby(['weekday', df.index.hour])['count'].mean().unstack(level = 0)
weekday | False | True |
---|
created_at | | |
---|
0 | 3.239120 | 3.467782 |
---|
1 | 1.668388 | 1.741849 |
---|
2 | 1.162551 | 1.161826 |
---|
3 | 1.086705 | 1.050000 |
---|
4 | 1.155556 | 1.076923 |
---|
5 | 1.136364 | 1.333333 |
---|
6 | 1.000000 | 1.000000 |
---|
7 | 1.000000 | 1.000000 |
---|
8 | 1.000000 | 1.071429 |
---|
9 | 1.080000 | 1.144928 |
---|
10 | 1.239011 | 1.254111 |
---|
11 | 2.031690 | 1.992958 |
---|
12 | 4.195845 | 4.031889 |
---|
13 | 6.668042 | 6.905772 |
---|
14 | 8.260503 | 8.851321 |
---|
15 | 8.934448 | 9.858422 |
---|
16 | 8.466504 | 9.420550 |
---|
17 | 6.784996 | 7.334743 |
---|
18 | 6.717731 | 7.342150 |
---|
19 | 8.655913 | 9.270430 |
---|
20 | 10.536496 | 11.173609 |
---|
21 | 10.846906 | 11.695043 |
---|
22 | 9.034164 | 10.419916 |
---|
23 | 5.946834 | 7.025452 |
---|
df.groupby(['weekday', df.index.hour])['count'].mean().unstack(level = 0).plot(figsize=(16,8))
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/59a32fe908194ce1a20e08cc9e86141b.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAU3Rhbm5pcw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
源文件下载