利用LSTM进行股票预测分析

需要使用的库:

  • tushare
  • pytorch
  • pandas
  • numpy
  • matplotlib

1. 数据导入

import tushare as ts 

ts.set_token('98edbe9c3e444002decb09646838a02f0307e41bd5ce51bb5b2a99c8')
ts_pro = ts.pro_api()
help(ts)
Help on package tushare:

NAME
    tushare - # -*- coding:utf-8 -*-

PACKAGE CONTENTS
    bond (package)
    coins (package)
    data (package)
    fund (package)
    futures (package)
    internet (package)
    pro (package)
    stock (package)
    subs (package)
    trader (package)
    util (package)

VERSION
    1.2.62

AUTHOR
    Jimmy Liu

FILE
    e:\anaconda3\lib\site-packages\tushare\__init__.py
# 股票代码
ts_code = '002069.SZ'

# 开始时间
start_date = '2006-01-01'

# 结束时间
end_date = '2020-01-01'

df = ts_pro.daily(
        ts_code=ts_code,
        start_date=start_date,
        end_date=end_date)

# 将获取的文件存储在本地
df.to_csv('002069.csv', index=0)

import pandas as pd 

df = pd.read_csv('002069.csv')

# 按照时间进行排序,方便预测处理
df = df.sort_values(['trade_date'], ascending=True)

# 简要查看文件信息
df.head()
ts_codetrade_dateopenhighlowclosepre_closechangepct_chgvolamount
3034002069.SZ2006092860.8964.4858.0062.1125.0037.11148.44169301.731.028809e+06
3033002069.SZ2006092964.1067.0062.4862.9962.110.881.4249290.443.182028e+05
3032002069.SZ2006100963.0066.1562.1665.0062.992.013.1928449.041.827514e+05
3031002069.SZ2006101064.8971.5064.0071.4965.006.499.9834935.762.381439e+05
3030002069.SZ2006101170.2071.8069.2269.9971.49-1.50-2.1015807.241.109196e+05

2. 数据预处理

# 选取这几个属性作为输入特征
use_cols = ['open', 'high', 'low', 'close', 'pre_close', 'vol', 'amount']
df = df[use_cols]
df.head()
openhighlowclosepre_closevolamount
303460.8964.4858.0062.1125.00169301.731.028809e+06
303364.1067.0062.4862.9962.1149290.443.182028e+05
303263.0066.1562.1665.0062.9928449.041.827514e+05
303164.8971.5064.0071.4965.0034935.762.381439e+05
303070.2071.8069.2269.9971.4915807.241.109196e+05
# 为了归一化后复现原来数据
close_min = df['close'].min()
close_max = df['close'].max()
# 归一化处理(0,1)
df=df.apply(lambda x:(x-min(x))/(max(x)-min(x)))
df.head()
openhighlowclosepre_closevolamount
30340.4015120.4167680.4042430.4013450.1516990.1665710.800115
30330.4235660.4337100.4367920.4072650.4013450.0477200.247007
30320.4160080.4279950.4344670.4207870.4072650.0270800.141577
30310.4289930.4639640.4478350.4644470.4207870.0335040.184693
30300.4654760.4659810.4857600.4543560.4644470.0145610.085666
import numpy as np 
# 序列长度为30,即用前一个月的数据预测之后一天的数据
sequence = 30

X = []
Y = []

for i in range(df.shape[0]-sequence):
    # 选择use_cols作为特征
    X.append(np.array(df.iloc[i:(i+sequence),].values, dtype=np.float))
    # 选择close作为标签输出
    Y.append(np.array(df.iloc[(i+sequence),3],dtype=np.float))

# 划分训练集和测试集
trainx, trainy = X[:int(0.8*df.shape[0])], Y[:int(0.8*df.shape[0])]
testx, testy = X[int(0.8*df.shape[0]):], Y[int(0.8*df.shape[0]):]
print(len(trainx))
print(len(testx))
2428
577
import torch
import torch.utils.data as Data 
torch.manual_seed(1)
<torch._C.Generator at 0x271cfb1a110>
# list -> numpy
trainx = np.array(trainx)
trainy = np.array(trainy)
testx = np.array(testx)
testy = np.array(testy)

# numpy -> torch
trainx = torch.from_numpy(trainx)
trainy = torch.from_numpy(trainy)
testx = torch.from_numpy(testx)
testy = torch.from_numpy(testy)

print('trainx size: ', trainx.size())
print('trainy size: ', trainy.size())
print('testx size: ', testx.size())
print('testy size: ', testy.size())
trainx size:  torch.Size([2428, 30, 7])
trainy size:  torch.Size([2428])
testx size:  torch.Size([577, 30, 7])
testy size:  torch.Size([577])
#  批处理 batch的大小为32
train_dataset = Data.TensorDataset(trainx, trainy)
test_dataset = Data.TensorDataset(testx, testy)

train_loader = Data.DataLoader(
    dataset=train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

test_loader = Data.DataLoader(
    dataset=test_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

3. 定义网络模型

input_size = 7
seq_len = 30
hidden_size = 32
output_size = 1

import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable

class MyNet(nn.Module):

    def __init__(self, input_size=input_size, hidden_size=hidden_size, output_size=output_size):
        super(MyNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(self.hidden_size*seq_len, self.output_size)
    
    def forward(self, input):
        out,_ = self.lstm(input)
        b, s, h = out.size()
        out = self.fc(out.reshape(b, s*h))
        return out 

net = MyNet()
print(net)

MyNet(
  (lstm): LSTM(7, 32, batch_first=True)
  (fc): Linear(in_features=960, out_features=1, bias=True)
)

4. 选择损失函数及优化器

import torch.optim as optim 
from tqdm import tqdm

loss_function = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in tqdm(range(100)):
    total_loss = 0
    for _,(data, label) in enumerate(train_loader):
        data = Variable(data).float()
        pred = net(data)
        label = Variable(label).float()
        label = label.unsqueeze(1)
        loss = loss_function(pred, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
        
    if (epoch+1) % 10 == 0:
        print('Epoch: ', epoch+1, ' loss: ', total_loss)
 10%|█         | 10/100 [00:17<02:32,  1.70s/it]Epoch:  10  loss:  0.03518655754669453
 20%|██        | 20/100 [00:34<02:14,  1.68s/it]Epoch:  20  loss:  0.023873500191257335
 30%|███       | 30/100 [00:53<02:11,  1.88s/it]Epoch:  30  loss:  0.02149457185441861
 40%|████      | 40/100 [01:14<02:07,  2.13s/it]Epoch:  40  loss:  0.017912164659719565
 50%|█████     | 50/100 [01:31<01:25,  1.72s/it]Epoch:  50  loss:  0.013031252356086043
 60%|██████    | 60/100 [01:51<01:14,  1.87s/it]Epoch:  60  loss:  0.014165752041662927
 70%|███████   | 70/100 [02:10<01:02,  2.09s/it]Epoch:  70  loss:  0.011516284157551127
 80%|████████  | 80/100 [02:27<00:35,  1.79s/it]Epoch:  80  loss:  0.01033103184090578
 90%|█████████ | 90/100 [02:47<00:17,  1.74s/it]Epoch:  90  loss:  0.010540407126427453
100%|██████████| 100/100 [03:07<00:00,  1.88s/it]Epoch:  100  loss:  0.01101792572899285

5. 测试模型

pred_list = []
label_list = []

for _, (data, label) in enumerate(test_loader):
    data = Variable(data).float()
    pred = net(data)
    pred_list.extend(pred.data.squeeze(1).tolist())
    label_list.extend(label.tolist())

pred_list[:5]
[0.003661651164293289,
 0.004493666812777519,
 0.017206232994794846,
 0.004013188183307648,
 -0.003268543630838394]
label_list[:5]
[0.005449041372351158,
 0.007938109653548601,
 0.015136226034308779,
 0.00538176925664312,
 0.0014127144298688192]

简单查看预测结果与真实值,发现相差不是特别明显

import matplotlib.pyplot as plt 


plt.plot([i*(close_max-close_min)+close_min for i in pred_list[:100]] , label='pred')
plt.plot([i*(close_max-close_min)+close_min for i in label_list[:100]], label='real')
plt.title('Stock Forecast')
plt.legend()
plt.show()

在这里插入图片描述

  • 5
    点赞
  • 53
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值