import pandas as pd
import numpy as np
示例 股票数据离散化
stock=pd.read_csv("/home/python/练习/stock_day/stock_day.csv")
stock.head()
|
open |
high |
close |
low |
volume |
price_change |
p_change |
ma5 |
ma10 |
ma20 |
v_ma5 |
v_ma10 |
v_ma20 |
turnover |
2018-02-27 |
23.53 |
25.88 |
24.16 |
23.53 |
95578.03 |
0.63 |
2.68 |
22.942 |
22.142 |
22.875 |
53782.64 |
46738.65 |
55576.11 |
2.39 |
2018-02-26 |
22.80 |
23.78 |
23.53 |
22.80 |
60985.11 |
0.69 |
3.02 |
22.406 |
21.955 |
22.942 |
40827.52 |
42736.34 |
56007.50 |
1.53 |
2018-02-23 |
22.88 |
23.37 |
22.82 |
22.71 |
52914.01 |
0.54 |
2.42 |
21.938 |
21.929 |
23.022 |
35119.58 |
41871.97 |
56372.85 |
1.32 |
2018-02-22 |
22.25 |
22.76 |
22.28 |
22.02 |
36105.01 |
0.36 |
1.64 |
21.446 |
21.909 |
23.137 |
35397.58 |
39904.78 |
60149.60 |
0.90 |
2018-02-14 |
21.49 |
21.99 |
21.92 |
21.48 |
23331.04 |
0.44 |
2.05 |
21.366 |
21.923 |
23.253 |
33590.21 |
42935.74 |
61716.11 |
0.58 |
qcut=pd.qcut(stock["p_change"], 10)
qcut.value_counts
<bound method IndexOpsMixin.value_counts of 2018-02-27 (1.738, 2.938]
2018-02-26 (2.938, 5.27]
2018-02-23 (1.738, 2.938]
2018-02-22 (0.94, 1.738]
2018-02-14 (1.738, 2.938]
2018-02-13 (0.94, 1.738]
2018-02-12 (2.938, 5.27]
2018-02-09 (-10.030999999999999, -4.836]
2018-02-08 (0.26, 0.94]
2018-02-07 (-2.444, -1.352]
2018-02-06 (-4.836, -2.444]
2018-02-05 (1.738, 2.938]
2018-02-02 (0.26, 0.94]
2018-02-01 (-10.030999999999999, -4.836]
2018-01-31 (-0.462, 0.26]
2018-01-30 (-0.462, 0.26]
2018-01-29 (-4.836, -2.444]
2018-01-26 (0.26, 0.94]
2018-01-25 (-4.836, -2.444]
2018-01-24 (-1.352, -0.462]
2018-01-23 (0.94, 1.738]
2018-01-22 (-0.462, 0.26]
2018-01-19 (1.738, 2.938]
2018-01-18 (-0.462, 0.26]
2018-01-17 (0.26, 0.94]
2018-01-16 (2.938, 5.27]
2018-01-15 (-4.836, -2.444]
2018-01-12 (1.738, 2.938]
2018-01-11 (-1.352, -0.462]
2018-01-10 (-1.352, -0.462]
...
2015-04-13 (5.27, 10.03]
2015-04-10 (-1.352, -0.462]
2015-04-09 (5.27, 10.03]
2015-04-08 (2.938, 5.27]
2015-04-07 (5.27, 10.03]
2015-04-03 (0.94, 1.738]
2015-04-02 (0.26, 0.94]
2015-04-01 (0.26, 0.94]
2015-03-31 (-2.444, -1.352]
2015-03-30 (2.938, 5.27]
2015-03-27 (5.27, 10.03]
2015-03-26 (-2.444, -1.352]
2015-03-25 (-2.444, -1.352]
2015-03-24 (1.738, 2.938]
2015-03-23 (-0.462, 0.26]
2015-03-20 (-0.462, 0.26]
2015-03-19 (0.26, 0.94]
2015-03-18 (0.26, 0.94]
2015-03-17 (1.738, 2.938]
2015-03-16 (1.738, 2.938]
2015-03-13 (1.738, 2.938]
2015-03-12 (-1.352, -0.462]
2015-03-11 (-2.444, -1.352]
2015-03-10 (1.738, 2.938]
2015-03-09 (-0.462, 0.26]
2015-03-06 (5.27, 10.03]
2015-03-05 (1.738, 2.938]
2015-03-04 (0.94, 1.738]
2015-03-03 (0.94, 1.738]
2015-03-02 (1.738, 2.938]
Name: p_change, Length: 643, dtype: category
Categories (10, interval[float64]): [(-10.030999999999999, -4.836] < (-4.836, -2.444] < (-2.444, -1.352] < (-1.352, -0.462] ... (0.94, 1.738] < (1.738, 2.938] < (2.938, 5.27] < (5.27, 10.03]]>
pd.get_dummies(qcut,prefix="rise")
|
rise_(-10.030999999999999, -4.836] |
rise_(-4.836, -2.444] |
rise_(-2.444, -1.352] |
rise_(-1.352, -0.462] |
rise_(-0.462, 0.26] |
rise_(0.26, 0.94] |
rise_(0.94, 1.738] |
rise_(1.738, 2.938] |
rise_(2.938, 5.27] |
rise_(5.27, 10.03] |
2018-02-27 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-26 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2018-02-23 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-22 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2018-02-14 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-13 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2018-02-12 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2018-02-09 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-02-08 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2018-02-07 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-02-06 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-02-05 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-02 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2018-02-01 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-31 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2018-01-30 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2018-01-29 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-26 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2018-01-25 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-24 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-23 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2018-01-22 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2018-01-19 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-01-18 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2018-01-17 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2018-01-16 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2018-01-15 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-12 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-01-11 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
2018-01-10 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
2015-04-13 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2015-04-10 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-04-09 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2015-04-08 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2015-04-07 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2015-04-03 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2015-04-02 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2015-04-01 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2015-03-31 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-03-30 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2015-03-27 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2015-03-26 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-03-25 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-03-24 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-23 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2015-03-20 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2015-03-19 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2015-03-18 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
2015-03-17 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-16 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-13 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-12 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-03-11 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2015-03-10 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-09 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
2015-03-06 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2015-03-05 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2015-03-04 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2015-03-03 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2015-03-02 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
643 rows × 10 columns
bins=[-100,-7,-5,-3,0,3,5,7,100]
cut=pd.cut(stock['p_change'], bins)
cut.value_counts
<bound method IndexOpsMixin.value_counts of 2018-02-27 (0, 3]
2018-02-26 (3, 5]
2018-02-23 (0, 3]
2018-02-22 (0, 3]
2018-02-14 (0, 3]
2018-02-13 (0, 3]
2018-02-12 (3, 5]
2018-02-09 (-7, -5]
2018-02-08 (0, 3]
2018-02-07 (-3, 0]
2018-02-06 (-5, -3]
2018-02-05 (0, 3]
2018-02-02 (0, 3]
2018-02-01 (-7, -5]
2018-01-31 (-3, 0]
2018-01-30 (0, 3]
2018-01-29 (-3, 0]
2018-01-26 (0, 3]
2018-01-25 (-5, -3]
2018-01-24 (-3, 0]
2018-01-23 (0, 3]
2018-01-22 (-3, 0]
2018-01-19 (0, 3]
2018-01-18 (0, 3]
2018-01-17 (0, 3]
2018-01-16 (3, 5]
2018-01-15 (-5, -3]
2018-01-12 (0, 3]
2018-01-11 (-3, 0]
2018-01-10 (-3, 0]
...
2015-04-13 (7, 100]
2015-04-10 (-3, 0]
2015-04-09 (5, 7]
2015-04-08 (5, 7]
2015-04-07 (5, 7]
2015-04-03 (0, 3]
2015-04-02 (0, 3]
2015-04-01 (0, 3]
2015-03-31 (-3, 0]
2015-03-30 (3, 5]
2015-03-27 (5, 7]
2015-03-26 (-3, 0]
2015-03-25 (-3, 0]
2015-03-24 (0, 3]
2015-03-23 (0, 3]
2015-03-20 (-3, 0]
2015-03-19 (0, 3]
2015-03-18 (0, 3]
2015-03-17 (0, 3]
2015-03-16 (0, 3]
2015-03-13 (0, 3]
2015-03-12 (-3, 0]
2015-03-11 (-3, 0]
2015-03-10 (0, 3]
2015-03-09 (0, 3]
2015-03-06 (7, 100]
2015-03-05 (0, 3]
2015-03-04 (0, 3]
2015-03-03 (0, 3]
2015-03-02 (0, 3]
Name: p_change, Length: 643, dtype: category
Categories (8, interval[int64]): [(-100, -7] < (-7, -5] < (-5, -3] < (-3, 0] < (0, 3] < (3, 5] < (5, 7] < (7, 100]]>
one_hot=pd.get_dummies(qcut,prefix="涨跌幅").head()
one_hot
|
涨跌幅_(-10.030999999999999, -4.836] |
涨跌幅_(-4.836, -2.444] |
涨跌幅_(-2.444, -1.352] |
涨跌幅_(-1.352, -0.462] |
涨跌幅_(-0.462, 0.26] |
涨跌幅_(0.26, 0.94] |
涨跌幅_(0.94, 1.738] |
涨跌幅_(1.738, 2.938] |
涨跌幅_(2.938, 5.27] |
涨跌幅_(5.27, 10.03] |
2018-02-27 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-26 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
2018-02-23 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
2018-02-22 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
2018-02-14 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
# 数据的合并
pd.concat([stock,one_hot],axis=1).head()
|
open |
high |
close |
low |
volume |
price_change |
p_change |
ma5 |
ma10 |
ma20 |
... |
涨跌幅_(-10.030999999999999, -4.836] |
涨跌幅_(-4.836, -2.444] |
涨跌幅_(-2.444, -1.352] |
涨跌幅_(-1.352, -0.462] |
涨跌幅_(-0.462, 0.26] |
涨跌幅_(0.26, 0.94] |
涨跌幅_(0.94, 1.738] |
涨跌幅_(1.738, 2.938] |
涨跌幅_(2.938, 5.27] |
涨跌幅_(5.27, 10.03] |
2015-03-02 |
12.25 |
12.67 |
12.52 |
12.20 |
96291.73 |
0.32 |
2.62 |
12.520 |
12.520 |
12.520 |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2015-03-03 |
12.52 |
13.06 |
12.70 |
12.52 |
139071.61 |
0.18 |
1.44 |
12.610 |
12.610 |
12.610 |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2015-03-04 |
12.80 |
12.92 |
12.90 |
12.61 |
67075.44 |
0.20 |
1.57 |
12.707 |
12.707 |
12.707 |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2015-03-05 |
12.88 |
13.45 |
13.16 |
12.87 |
93180.39 |
0.26 |
2.02 |
12.820 |
12.820 |
12.820 |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2015-03-06 |
13.17 |
14.48 |
14.28 |
13.13 |
179831.72 |
1.12 |
8.51 |
13.112 |
13.112 |
13.112 |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
5 rows × 24 columns
stock
|
open |
high |
close |
low |
volume |
price_change |
p_change |
ma5 |
ma10 |
ma20 |
v_ma5 |
v_ma10 |
v_ma20 |
turnover |
2018-02-27 |
23.53 |
25.88 |
24.16 |
23.53 |
95578.03 |
0.63 |
2.68 |
22.942 |
22.142 |
22.875 |
53782.64 |
46738.65 |
55576.11 |
2.39 |
2018-02-26 |
22.80 |
23.78 |
23.53 |
22.80 |
60985.11 |
0.69 |
3.02 |
22.406 |
21.955 |
22.942 |
40827.52 |
42736.34 |
56007.50 |
1.53 |
2018-02-23 |
22.88 |
23.37 |
22.82 |
22.71 |
52914.01 |
0.54 |
2.42 |
21.938 |
21.929 |
23.022 |
35119.5 |