这一节学习机器学习算法的对比,第一章学习如何将数据多维展示。
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from sklearn.neighbors import KNeighborsClassifier
plt.style.use('seaborn')
#设置需要预测的指数
indexcode = '000016.SH'
#设置历史区间
startdate = '20140101'
enddate = '20190123'
#获取历史区间的交易列表
tradelist = list(get_trade_days(startdate, enddate, count=None).strftime('%Y%m%d'))
#设置数据分类标签
label = ['money rate %','net up rate % ','mean of updown %']#资金流向、涨跌比、平均涨幅
In [142]:
date = '20190123'
nextdate = '20190124'
stock = get_index_stocks(indexcode,date)
df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
label3 = round(df.mean()[0],3)
label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
label1 = round(moneydf.mean()[0],3)
value = get_price(indexcode, date, nextdate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate']
dt = pd.DataFrame([label1,label2,label3],index =label,columns=[date]).T
dt['now up']=list(value)[0]
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(value)[1]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt
Out[142]:
money rate % | net up rate % | mean of updown % | now up | now label | next up | label | |
---|---|---|---|---|---|---|---|
20190123 | -4.998 | -0.02 | -0.088 | -0.1742 | -1 | 0.6054 | 1 |
In [143]:
dt = pd.DataFrame(columns = label)
for date in tradelist:
stock = get_index_stocks(indexcode,date)
df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
label3 = round(df.mean()[0],3)
label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
label1 = round(moneydf.mean()[0],3)
dt.loc[date] = [label1,label2,label3]
value = list(get_price(indexcode, startdate, enddate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'])
dt['now up']=value
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(dt['now up'])[1:]+[0]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt
Out[143]:
money rate % | net up rate % | mean of updown % | now up |
---|