import pandas as pd
import numpy as np
#数据离散化
data = pd.read_csv("./BRNN/data/stock_day.csv")
#数据不隐藏
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# pd.set_option('display.width', None)
#print(data.head())
p_change = data["p_change"]
#
# #自动分成差不多数量的类别
# qcut = pd.qcut(p_change,10)
#
# #自动计算每个类里面有多少个
# x1 = qcut.value_counts()
# print(x1)
#
# #指定区间分组
bins = [-100, -7, -5, -3, 0, 3, 5, 7, 100]
p_count = pd.cut(p_change, bins)
x2 = p_count.value_counts()
print(x2)
#one-hot编码
dummies = pd.get_dummies(p_change, prefix="rise")
print(dummies.head())
x3 = pd.concat([data, dummies], axis=1)
print(x3)
x1 = data.head()
#print(x1)
x2 = data.index
# print(x2)
#把index的类型object 转化成日期
time = pd.to_datetime(data.index)
x3 = time.day
x4 = time.weekday
data["week"] = time.weekday
print(data.head())
#判断某一列是否大于0 大于0则等于1 小于0 则等0 添加到数据表中
data["p_n"] = np.where(data["p_change"] > 0, 1, 0)
print(data.head())
#统计
count = pd.crosstab(data["week"], data["p_n"])
print(count)
#求每行的和
sum = count.sum(axis = 1).astype(np.float32)
#列 除总数 求百分占比
ret = count.div(sum, axis=0)
x5 = ret.plot(kind="bar", stacked=True)
plt.show()
#直接求百分占bi
data.pivot_table(["p_n"], index="week")
分类聚合