需要使用的库:
tushare pytorch pandas numpy matplotlib
1. 数据导入
import tushare as ts
ts. set_token( '98edbe9c3e444002decb09646838a02f0307e41bd5ce51bb5b2a99c8' )
ts_pro = ts. pro_api( )
help ( ts)
Help on package tushare:
NAME
tushare - # -*- coding:utf-8 -*-
PACKAGE CONTENTS
bond (package)
coins (package)
data (package)
fund (package)
futures (package)
internet (package)
pro (package)
stock (package)
subs (package)
trader (package)
util (package)
VERSION
1.2.62
AUTHOR
Jimmy Liu
FILE
e:\anaconda3\lib\site-packages\tushare\__init__.py
ts_code = '002069.SZ'
start_date = '2006-01-01'
end_date = '2020-01-01'
df = ts_pro. daily(
ts_code= ts_code,
start_date= start_date,
end_date= end_date)
df. to_csv( '002069.csv' , index= 0 )
import pandas as pd
df = pd. read_csv( '002069.csv' )
df = df. sort_values( [ 'trade_date' ] , ascending= True )
df. head( )
ts_code trade_date open high low close pre_close change pct_chg vol amount 3034 002069.SZ 20060928 60.89 64.48 58.00 62.11 25.00 37.11 148.44 169301.73 1.028809e+06 3033 002069.SZ 20060929 64.10 67.00 62.48 62.99 62.11 0.88 1.42 49290.44 3.182028e+05 3032 002069.SZ 20061009 63.00 66.15 62.16 65.00 62.99 2.01 3.19 28449.04 1.827514e+05 3031 002069.SZ 20061010 64.89 71.50 64.00 71.49 65.00 6.49 9.98 34935.76 2.381439e+05 3030 002069.SZ 20061011 70.20 71.80 69.22 69.99 71.49 -1.50 -2.10 15807.24 1.109196e+05
2. 数据预处理
use_cols = [ 'open' , 'high' , 'low' , 'close' , 'pre_close' , 'vol' , 'amount' ]
df = df[ use_cols]
df. head( )
open high low close pre_close vol amount 3034 60.89 64.48 58.00 62.11 25.00 169301.73 1.028809e+06 3033 64.10 67.00 62.48 62.99 62.11 49290.44 3.182028e+05 3032 63.00 66.15 62.16 65.00 62.99 28449.04 1.827514e+05 3031 64.89 71.50 64.00 71.49 65.00 34935.76 2.381439e+05 3030 70.20 71.80 69.22 69.99 71.49 15807.24 1.109196e+05
close_min = df[ 'close' ] . min ( )
close_max = df[ 'close' ] . max ( )
df= df. apply ( lambda x: ( x- min ( x) ) / ( max ( x) - min ( x) ) )
df. head( )
open high low close pre_close vol amount 3034 0.401512 0.416768 0.404243 0.401345 0.151699 0.166571 0.800115 3033 0.423566 0.433710 0.436792 0.407265 0.401345 0.047720 0.247007 3032 0.416008 0.427995 0.434467 0.420787 0.407265 0.027080 0.141577 3031 0.428993 0.463964 0.447835 0.464447 0.420787 0.033504 0.184693 3030 0.465476 0.465981 0.485760 0.454356 0.464447 0.014561 0.085666
import numpy as np
sequence = 30
X = [ ]
Y = [ ]
for i in range ( df. shape[ 0 ] - sequence) :
X. append( np. array( df. iloc[ i: ( i+ sequence) , ] . values, dtype= np. float ) )
Y. append( np. array( df. iloc[ ( i+ sequence) , 3 ] , dtype= np. float ) )
trainx, trainy = X[ : int ( 0.8 * df. shape[ 0 ] ) ] , Y[ : int ( 0.8 * df. shape[ 0 ] ) ]
testx, testy = X[ int ( 0.8 * df. shape[ 0 ] ) : ] , Y[ int ( 0.8 * df. shape[ 0 ] ) : ]
print ( len ( trainx) )
print ( len ( testx) )
2428
577
import torch
import torch. utils. data as Data
torch. manual_seed( 1 )
<torch._C.Generator at 0x271cfb1a110>
trainx = np. array( trainx)
trainy = np. array( trainy)
testx = np. array( testx)
testy = np. array( testy)
trainx = torch. from_numpy( trainx)
trainy = torch. from_numpy( trainy)
testx = torch. from_numpy( testx)
testy = torch. from_numpy( testy)
print ( 'trainx size: ' , trainx. size( ) )
print ( 'trainy size: ' , trainy. size( ) )
print ( 'testx size: ' , testx. size( ) )
print ( 'testy size: ' , testy. size( ) )
trainx size: torch.Size([2428, 30, 7])
trainy size: torch.Size([2428])
testx size: torch.Size([577, 30, 7])
testy size: torch.Size([577])
train_dataset = Data. TensorDataset( trainx, trainy)
test_dataset = Data. TensorDataset( testx, testy)
train_loader = Data. DataLoader(
dataset= train_dataset,
batch_size= 32 ,
shuffle= True ,
num_workers= 2
)
test_loader = Data. DataLoader(
dataset= test_dataset,
batch_size= 32 ,
shuffle= True ,
num_workers= 2
)
3. 定义网络模型
input_size = 7
seq_len = 30
hidden_size = 32
output_size = 1
import torch. nn as nn
import torch. nn. functional as F
from torch. autograd import Variable
class MyNet ( nn. Module) :
def __init__ ( self, input_size= input_size, hidden_size= hidden_size, output_size= output_size) :
super ( MyNet, self) . __init__( )
self. input_size = input_size
self. hidden_size = hidden_size
self. output_size = output_size
self. lstm = nn. LSTM( input_size= input_size, hidden_size= hidden_size, batch_first= True )
self. fc = nn. Linear( self. hidden_size* seq_len, self. output_size)
def forward ( self, input ) :
out, _ = self. lstm( input )
b, s, h = out. size( )
out = self. fc( out. reshape( b, s* h) )
return out
net = MyNet( )
print ( net)
MyNet(
(lstm): LSTM(7, 32, batch_first=True)
(fc): Linear(in_features=960, out_features=1, bias=True)
)
4. 选择损失函数及优化器
import torch. optim as optim
from tqdm import tqdm
loss_function = nn. MSELoss( )
optimizer = optim. Adam( net. parameters( ) , lr= 0.001 )
for epoch in tqdm( range ( 100 ) ) :
total_loss = 0
for _, ( data, label) in enumerate ( train_loader) :
data = Variable( data) . float ( )
pred = net( data)
label = Variable( label) . float ( )
label = label. unsqueeze( 1 )
loss = loss_function( pred, label)
loss. backward( )
optimizer. step( )
optimizer. zero_grad( )
total_loss += loss. item( )
if ( epoch+ 1 ) % 10 == 0 :
print ( 'Epoch: ' , epoch+ 1 , ' loss: ' , total_loss)
10%|█ | 10/100 [00:17<02:32, 1.70s/it]Epoch: 10 loss: 0.03518655754669453
20%|██ | 20/100 [00:34<02:14, 1.68s/it]Epoch: 20 loss: 0.023873500191257335
30%|███ | 30/100 [00:53<02:11, 1.88s/it]Epoch: 30 loss: 0.02149457185441861
40%|████ | 40/100 [01:14<02:07, 2.13s/it]Epoch: 40 loss: 0.017912164659719565
50%|█████ | 50/100 [01:31<01:25, 1.72s/it]Epoch: 50 loss: 0.013031252356086043
60%|██████ | 60/100 [01:51<01:14, 1.87s/it]Epoch: 60 loss: 0.014165752041662927
70%|███████ | 70/100 [02:10<01:02, 2.09s/it]Epoch: 70 loss: 0.011516284157551127
80%|████████ | 80/100 [02:27<00:35, 1.79s/it]Epoch: 80 loss: 0.01033103184090578
90%|█████████ | 90/100 [02:47<00:17, 1.74s/it]Epoch: 90 loss: 0.010540407126427453
100%|██████████| 100/100 [03:07<00:00, 1.88s/it]Epoch: 100 loss: 0.01101792572899285
5. 测试模型
pred_list = [ ]
label_list = [ ]
for _, ( data, label) in enumerate ( test_loader) :
data = Variable( data) . float ( )
pred = net( data)
pred_list. extend( pred. data. squeeze( 1 ) . tolist( ) )
label_list. extend( label. tolist( ) )
pred_list[ : 5 ]
[0.003661651164293289,
0.004493666812777519,
0.017206232994794846,
0.004013188183307648,
-0.003268543630838394]
label_list[ : 5 ]
[0.005449041372351158,
0.007938109653548601,
0.015136226034308779,
0.00538176925664312,
0.0014127144298688192]
简单查看预测结果与真实值,发现相差不是特别明显
import matplotlib. pyplot as plt
plt. plot( [ i* ( close_max- close_min) + close_min for i in pred_list[ : 100 ] ] , label= 'pred' )
plt. plot( [ i* ( close_max- close_min) + close_min for i in label_list[ : 100 ] ] , label= 'real' )
plt. title( 'Stock Forecast' )
plt. legend( )
plt. show( )