import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('./order_info_2016.csv', index_col = 'id')
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 104557 entries, 1 to 104557
Data columns (total 10 columns):
orderId 104557 non-null int64
userId 104557 non-null int64
productId 104557 non-null int64
cityId 104557 non-null int64
price 104557 non-null int64
payMoney 104557 non-null int64
channelId 104549 non-null object
deviceType 104557 non-null int64
createTime 104557 non-null object
payTime 104557 non-null object
dtypes: int64(7), object(3)
memory usage: 8.8+ MB
df.head()
|
orderId |
userId |
productId |
cityId |
price |
payMoney |
channelId |
deviceType |
createTime |
payTime |
id |
|
|
|
|
|
|
|
|
|
|
1 |
232148841 |
2794924 |
268 |
110001 |
35300 |
35300 |
9058255c90 |
3 |
2016-01-01 12:53:02 |
2016-01-01 12:53:24 |
2 |
222298971 |
1664684 |
801 |
330001 |
51200 |
49900 |
e0e6019897 |
2 |
2016-01-01 21:42:51 |
2016-01-01 21:43:30 |
3 |
211494392 |
2669372 |
934 |
220002 |
62100 |
62100 |
9058255c90 |
3 |
2016-01-01 14:10:13 |
2016-01-01 14:11:18 |
4 |
334575272 |
1924727 |
422 |
230001 |
50600 |
42000 |
46d5cea30d |
2 |
2016-01-01 17:43:35 |
2016-01-01 17:43:53 |
5 |
144825651 |
4148671 |
473 |
130006 |
149100 |
142000 |
6ff1752b69 |
2 |
2016-01-01 18:52:04 |
2016-01-01 18:52:47 |
df.describe()
|
orderId |
userId |
productId |
cityId |
price |
payMoney |
deviceType |
count |
1.045570e+05 |
1.045570e+05 |
104557.000000 |
104557.000000 |
1.045570e+05 |
1.045570e+05 |
104557.000000 |
mean |
2.993341e+08 |
3.270527e+06 |
504.566275 |
154410.947225 |
9.167350e+04 |
8.686689e+04 |
2.385292 |
std |
5.149818e+07 |
4.138208e+07 |
288.130647 |
72197.163762 |
9.158836e+04 |
9.072028e+04 |
0.648472 |
min |
1.035627e+08 |
2.930600e+04 |
0.000000 |
30000.000000 |
6.000000e+02 |
-1.000000e+03 |
1.000000 |
25% |
2.633627e+08 |
2.179538e+06 |
254.000000 |
100011.000000 |
3.790000e+04 |
3.360000e+04 |
2.000000 |
50% |
2.989828e+08 |
2.705995e+06 |
507.000000 |
150001.000000 |
5.920000e+04 |
5.500000e+04 |
2.000000 |
75% |
3.349972e+08 |
3.271237e+06 |
758.000000 |
220002.000000 |
1.080000e+05 |
1.040000e+05 |
3.000000 |
max |
4.871430e+08 |
3.072939e+09 |
1000.000000 |
380001.000000 |
2.295600e+06 |
2.294200e+06 |
6.000000 |
device_type = pd.read_csv('./device_type.txt')
device_type
|
id |
deviceType |
0 |
1 |
PC |
1 |
2 |
Android |
2 |
3 |
iPhone |
3 |
4 |
Wap |
4 |
5 |
other |
df.orderId.unique().size
df.orderId.unique().size < df