import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('log.txt',sep='\t',header=None)
df.head()
|
0 |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
0 |
2019162542 |
/front-api/bill/create |
8 |
1057.31 |
88.75 |
177.72 |
132.0 |
60 |
2018-11-01 00:00:07 |
1 |
162644 |
/front-api/bill/create |
5 |
749.12 |
103.79 |
240.38 |
149.0 |
60 |
2018-11-01 00:01:07 |
2 |
162742 |
/front-api/bill/create |
5 |
845.84 |
136.31 |
225.73 |
169.0 |
60 |
2018-11-01 00:02:07 |
3 |
162808 |
/front-api/bill/create |
9 |
1305.52 |
90.12 |
196.61 |
145.0 |
60 |
2018-11-01 00:03:07 |
4 |
162943 |
/front-api/bill/create |
3 |
568.89 |
138.45 |
232.02 |
189.0 |
60 |
2018-11-01 00:04:07 |
df.shape
(179496, 9)
df.dtypes
0 int64
1 object
2 int64
3 float64
4 float64
5 float64
6 float64
7 int64
8 object
dtype: object
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179496 entries, 0 to 179495
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 0 179496 non-null int64
1 1 179496 non-null object
2 2 179496 non-null int64
3 3 179496 non-null float64
4 4 179496 non-null float64
5 5 179496 non-null float64
6 6 179496 non-null float64
7 7 179496 non-null int64
8 8 179496 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 12.3+ MB
df.describe()
|
0 |
2 |
3 |
4 |
5 |
6 |
7 |
count |
1.794960e+05 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.0 |
mean |
6.877739e+06 |
7.175909 |
1393.177832 |
108.419626 |
359.880374 |
187.812208 |
60.0 |
std |
6.012494e+06 |
4.325160 |
1499.486073 |
79.640693 |
638.919827 |
224.464813 |
0.0 |
min |
1.626440e+05 |
1.000000 |
36.550000 |
3.210000 |
36.550000 |
36.000000 |
60.0 |
25% |
3.825233e+06 |
4.000000 |
607.707500 |
83.410000 |
198.280000 |
144.000000 |
60.0 |
50% |
6.811510e+06 |
7.000000 |
1154.905000 |
97.120000 |
256.090000 |
167.000000 |
60.0 |
75% |
9.981455e+06 |
10.000000 |
1834.117500 |
116.990000 |
374.410000 |
202.000000 |
60.0 |
max |
2.019163e+09 |
31.000000 |
142650.550000 |
18896.640000 |
142468.270000 |
71325.000000 |
60.0 |
加上列名
列名 描述
- id 自增字段
- api api对应的url
- count 单位时间内被访问的次数
- res_time_sum 响应时间总和(毫秒)
- res_time_min 最小响应时间
- res_time_max 最大响应时间
- res_time_avg 平均值
- interval 采样间隔时间(秒)
- created_at 创建日志时间
df.columns = ['id','api','count','res_time_sum','res_time_min','res_time_max','res_time_avg','interval','created_at']
df.head()
|
id |
api |
count |
res_time_sum |
res_time_min |
res_time_max |
res_time_avg |
interval |
created_at |
0 |
2019162542 |
/front-api/bill/create |
8 |
1057.31 |
88.75 |
177.72 |
132.0 |
60 |
2018-11-01 00:00:07 |
1 |
162644 |
/front-api/bill/create |
5 |
749.12 |
103.79 |
240.38 |
149.0 |
60 |
2018-11-01 00:01:07 |
2 |
162742 |
/front-api/bill/create |
5 |
845.84 |
136.31 |
225.73 |
169.0 |
60 |
2018-11-01 00:02:07 |
3 |
162808 |
/front-api/bill/create |
9 |
1305.52 |
90.12 |
196.61 |
145.0 |
60 |
2018-11-01 00:03:07 |
4 |
162943 |
/front-api/bill/create |
3 |
568.89 |
138.45 |
232.02 |
189.0 |
60 |
2018-11-01 00:04:07 |
检测是否有重复值
df.describe()
|
id |
count |
res_time_sum |
res_time_min |
res_time_max |
res_time_avg |
interval |
count |
1.794960e+05 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.000000 |
179496.0 |
mean |
6.877739e+06 |
7.175909 |
1393.177832 |
108.419626 |
359.880374 |
187.812208 |
60.0 |
std |
6.012494e+06 |
4.325160 |
1499.486073 |
79.640693 |
638.919827 |
224.464813 |
0.0 |
min |
1.626440e+05 |
1.000000 |
36.550000 |
3.210000 |
36.550000 |
36.000000 |
60.0 |
25% |
3.825233e+06 |
4.000000 |
607.707500 |
83.410000 |
198.280000 |
144.000000 |
60.0 |
50% |
6.811510e+06 |
7.000000 |
1154.905000 |
97.120000 |
256.090000 |
167.000000 |
60.0 |
75% |
9.981455e+06 |
10.000000 |
1834.117500 |
116.990000 |
374.410000 |
202.000000 |
60.0 |
max |
2.019163e+09 |
31.000000 |
142650.550000 |
18896.640000 |
142468.270000 |
71325.000000 |
60.0 |
df.sample(3)
|
id |
api |
count |
res_time_sum |
res_time_min |
res_time_max |
res_time_avg |
interval |
created_at |
23273 |
2234204 |
/front-api/bill/create |
2 |
357.94 |
140.89 |
217.05 |
178.0 |
60 |
2018-11-28 00:31:01 |
83811 |
6426819 |
/front-api/bill/create |
11 |
2628.88 |
91.26 |
657.51 |
238.0 |
60 |
2019-02-06 19:56:59 |
85211 |
6519448 |
/front-api/bill/create |
7 |
1102.81 |
89.82 |
239.91 |
157.0 |
60 |
2019-02-08 15:16:02 |
检测是否有异常值
df.isnull().any()
id False
api False
count False
res_time_sum False
res_time_min False
res_time_max False
res_time_avg False
interval False
created_at False
dtype: bool
df.sample(3)
|
id |
api |
count |
res_time_sum |
res_time_min |
res_time_max |
res_time_avg |
interval |
created_at |
29671 |
2808559 |
/front-api/bill/create |
6 |
777.74 |
103.89 |
162.75 |
129.0 |
60 |
2018-12-05 16:25:16 |
19326 |
1881203 |
/front-api/bill/create |
13 |
2141.63 |
112.94 |
24 |