具体程序:
Ind.py
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 27 13:46:50 2020
@author: nuoyanli
"""
import pandas as pd
import numpy as np
# 移动平均线指标的计算
def MA(data, N1, N2, N3):
MAN1 = pd.Series(data['Clsprc'].values).rolling(N1).mean()
MAN2 = pd.Series(data['Clsprc'].values).rolling(N2).mean()
MAN3 = pd.Series(data['Clsprc'].values).rolling(N3).mean()
return (MAN1, MAN2, MAN3)
# 指数平滑异同平均线的计算
def MACD(data):
EMA12 = pd.Series(data['Clsprc'].values).ewm(halflife=12).mean()
EMA26 = pd.Series(data['Clsprc'].values).ewm(halflife=26).mean()
DIF = EMA12 - EMA26
DEA = np.zeros((len(DIF)))
MACD = np.zeros((len(DIF)))
for t in range(len(DIF)):
if t == 0:
DEA[t] = DIF[t]
if t > 0:
DEA[t] = (2 * DIF[t] + 8 * DEA[t - 1]) / 10
MACD[t] = 2 * (DIF[t] - DEA[t])
return MACD
# 随机指标KDJ的计算
def KDJ(data, N):
# Lmin=pd.rolling_min(data['Loprc'].values,N)
Lmin = pd.Series(data['Loprc'].values).rolling(N).min()
Lmax = pd.Series(data['Hiprc'].values).rolling(N).max()
RSV = (data['Clsprc'].values - Lmin) / (Lmax - Lmin)
K = np.zeros((len(RSV)))
D = np.zeros((len(RSV)))
J = np.zeros((len(RSV)))
for t in range(N, len(data)):
if t == 0:
K[t] = RSV[t]
D[t] = RSV[t]
if t > 0:
K[t] = 2 / 3 * K[t - 1] + 1 / 3 * RSV[t]
D[t] = 2 / 3 * D[t - 1] + 1 / 3 * K[t]
J[t] = 3 * D[t] - 2 * K[t]
return (K, D, J)
# 相对强弱指标的计算
def RSI(data, N):
z = np.zeros(len(data) - 1)
z[data.iloc[1:, 2].values - data.iloc[0:-1, 2].values >= 0] = 1
z[data.iloc[1:, 2].values - data.iloc[0:-1, 2].values < 0] = -1
z1 = pd.Series(z == 1).rolling(N).sum()
z2 = pd.Series(z == -1).rolling(N).sum()
rsi = np.zeros((len(data)))
for t in range(N - 1, len(data) - 1):
rsi[t] = z1[t] / (z1[t] + z2[t])
return rsi
# 乖离率指标
def BIAS(data, N):
bias = np.zeros((len(data)))
man = pd.Series(data.iloc[:, 2].values).rolling(N).mean()
for t in range(N - 1, len(data)):
bias[t] = (data.iloc[t, 2] - man[t]) / man[t]
return bias
# 能量潮指标的计算
def OBV(data):
obv = np.zeros((len(data)))
for t in range(len(data)):
if t == 0:
obv[t] = data['Dnshrtrd'].values[t]
if t > 0:
if data['Clsprc'].values[t] >= data['Clsprc'].values[t - 1]:
obv[t] = obv[t - 1] + data['Dnshrtrd'].values[t]
if data['Clsprc'].values[t] < data['Clsprc'].values[t - 1]:
obv[t] = obv[t - 1] - data['Dnshrtrd'].values[t]
return obv
# 涨跌趋势(分类指标/预测指标)的计算
def cla(data):
y = np.zeros(len(data))
z = np.zeros(len(y) - 1)
for i in range(len(z)):
z[data.iloc[1:, 2].values - data.iloc[0:1, 2].values > 0] = 1
z[data.iloc[1:, 2].values - data.iloc[0:-1, 2].values == 0] = 0
z[data.iloc[1:, 2].values - data.iloc[0:-1, 2].values < 0] = -1
y[i] = z[i]
return y
理解:
Series
:是一种类似于一维数组的对象,它由一组数据(各种NumPy数据类型)以及一组与之相关的数据标签(即索引)组成。 例如: 从结果可以看出Series的字符串表现形式为:索引在左边,值在右边。 在上面的例子中我并没有为数据指定具体索引,它会自动创建一个0到N-1(N为数据的长度)的整数型索引。- series rolling:移动窗口,为了提升数据的准确性,将某个点的取值扩大到包含这个点的一段区间,用区间来进行判断,对于
pd.Series(s).rolling(window=3).mean()
就是每次往后移动一位计算3位的平均值代替当前位置。
benefit.py
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 27 13:43:56 2020
@author: nuoyanli
"""
from predict import result1
import pandas as pd
data = pd.read_excel('./数据/data.xlsx')
r_list = [] # 存放收益率
r_trd = [] # 存放交易日期
for t in range(len(result1) - 1):
# 如果预测结果为1,执行投资策略
if result1['预测结果'].values[t] == 1:
p1 = data.loc[data['Trddt'].values == result1['交易日期'].values[t], 'Clsprc'].values
dt = data.loc[data['Trddt'].values > result1['交易日期'].values[t], ['Trddt', 'Clsprc']]
dt = dt.sort_values('Trddt')
p2 = dt['Clsprc'].values[0]
r = (p2 - p1) / p1
r_list.append(r)
r_trd.append(result1['交易日期'].values[t])
r_total = sum(r_list)
print(r_total)
trd_r = {'交易日期': r_trd, '收益率': r_list}
trd_r = pd.DataFrame(trd_r)
print(trd_r)
predict.py
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 27 14:23:52 2020
@author: nuoyanli
"""
from main import Data
import pandas as pd
from sklearn.preprocessing import StandardScaler
# 提取训练和测试数据
x1 = Data['交易日期'] >= '2017-01-01'
x2 = Data['交易日期'] <= '2017-11-30'
xx = x1 & x2
index = xx.values == True
index1 = xx.values == False
x_train = Data.iloc[index, 1:15]
y_train = Data.iloc[index, [15]]
x_test = Data.iloc[index1, 1:15]
y_test = Data.iloc[index1, [15]]
# 数据标准化
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
##支持向量机模型
from sklearn import svm
clf = svm.SVC()
clf.fit(x_train, y_train)
result = clf.predict(x_test)
sc = clf.score(x_train, y_train)
# print(sc)
print(sc)
result = pd.DataFrame(result)
# 提取预测样本的交易日期
ff = Data.iloc[index1, 0]
# 将预测结果与实践结果整合在一起,进行比较
pm1 = {'交易日期': ff.values, '预测结果': result.iloc[:, 0].values, '实际结果': y_test.iloc[:, 0].values}
result1 = pd.DataFrame(pm1)
z = result1['预测结果'].values - result1['实际结果'].values
R = len(z[z == 0]) / len(z)
# 预测的准确率
print(R)
print(result1)
list1 = result1['交易日期']
list2 = result1['预测结果']
list3 = result1['实际结果']
D = {'time': list1, 'predict': list2, 'true': list3}
D = pd.DataFrame(D)
D.to_excel('DD.xlsx')
main.py
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 27 14:03:16 2020
@author: nuoyanli
"""
import Ind
import pandas as pd
data = pd.read_excel('./数据/data.xlsx')
MA = Ind.MA(data, 5, 10, 20)
macd = Ind.MACD(data)
kdj = Ind.KDJ(data, 9)
rsi6 = Ind.RSI(data, 6)
rsi12 = Ind.RSI(data, 12)
rsi24 = Ind.RSI(data, 24)
bias5 = Ind.BIAS(data, 5)
bias10 = Ind.BIAS(data, 10)
bias20 = Ind.BIAS(data, 20)
obv = Ind.OBV(data)
y = Ind.cla(data)
# 将计算出的技术指标与交易日期以及股价的涨跌趋势利用字典整合在一起
pm = {'交易日期': data['Trddt'].values}
PM = pd.DataFrame(pm)
DF = {'MA5': MA[0], 'MA10': MA[1], 'MA20': MA[2], 'MACD': macd,
'K': kdj[0], 'D': kdj[1], 'J': kdj[2], 'RSI6': rsi6, 'RSI12': rsi12,
'RSI24': rsi24, 'BIAS5': bias5, 'BIAS10': bias10, 'BIAS20': bias20, 'OBV': obv}
DF = pd.DataFrame(DF)
s1 = PM.join(DF)
y1 = {'涨跌趋势': y}
ZZ = pd.DataFrame(y1)
s2 = s1.join(ZZ)
# 去掉空值
ss = s2.dropna()
# 将ss中第6列不为0的值提取出来,存放到Data中
Data = ss[ss.iloc[:, 6].values != 0]
print(Data)
Data.to_excel('res.xlsx')