1、概述
关于Lenet-5的介绍以及应用,参考CNN-LeNet网络图像分类_月融花下的博客-CSDN博客。
2、特征分析
数据集:tt.csv
date | 时间 |
open | 开盘价 |
high | 最高价 |
low | 最低价 |
close | 关盘价 |
label | 1或者-1 |
Lenet-5是基于对图片的二维卷积,图片本质上是二维数据(黑白)和三维数据(彩色),需要构建二维数据。其中以窗口滑动的方式,将open、high、low构成特征集,以窗口大小里出现label次数最多的数字作为标签集。
3、实验部分
numpy中文文档:NumPy
sklearn中文文档:sklearn中文文档
tensorflow文档:Module: tf | TensorFlow v2.13.0
3.1 模块加载
import pandas as pd
from sklearn.preprocessing import minmax_scale
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from scipy.stats import mode
import tensorflow.compat.v1 as tf
3.2 数据处理
3.2.1 读取数据
df = pd.read_csv("../data/stock_forecast/tt.csv")
#数据归一化
df["open"] = minmax_scale(df['open'])
df['high'] = minmax_scale(df['high'])
df['low'] = minmax_scale(df['low'])
df.head()
3.2.2 构建数据
#构建向量矩阵
data = []
label = []
以90为窗口大小进行滑动。
#定义窗口函数
def windows(data,size):
start = 0;
while start<data.count():
yield int(start),int(start+size) #(0,90) (45,135) (90,180) (135,225)
start += (size/2)
#返回格式数据
def segment_signal(data,window_size=90):
segments = np.empty((0,window_size,3)) #(0,90,3)
print("segments的结构:\t",segments.shape)
labels = np.empty(0) #(0,)
print("labels的结构 :\t",labels.shape)
for (start,end) in windows(data["date"],window_size):
x = data["open"][start:end]
y = data["high"][start:end]
z = data["low"][start:end]
if (len(df["date"][start:end])==window_size):
segments = np.vstack([segments,np.dstack([x,y,z])]) # np.dstack([x,y,z]) (1,90,3)
labels = np.append(labels,mode(data["label"][start:end])[0])
return segments,labels
#选取间隔90的数据为特征数据,标签预测最多的为标签数据,一一对应
data,label = segment_signal(df) #data (15,90,3) label (15,)
print("data:",data.shape)
print("label:",label.shape)
#对标签进行数据处理,用
for i in range(0,len(label)):
if label[i] == -1:
label[i] = 0;
x_train,x_test,y_train,y_test= train_test_split(data,label,test_size=0.2)
x_train = np.array(x_train).reshape(len(x_train),90,3) #(12,90,3)
x_test = np.array(x_test).reshape(len(x_test),90,3) #(3,90,3)
y_train = np.array(y_train).reshape(-1,1) #(12,1)
y_test = np.array(y_test).reshape(-1,1) #(3,1)
y_train.shape
3.2.3 标签独热编码
#one-hot
enc = OneHotEncoder()
enc.fit(y_train)
y_train = enc.transform(y_train).toarray() #(12,2)
y_test = enc.transform(y_test).toarray() #(3,2)
print(y_train)
3.3 网络结构
3.3.1 设置参数
in_channels = 3
units = 256
epoch = 10000
batch_size = 5
batch = x_train.shape[0]/batch_size
3.3.2 占位符
#创建占位符
tf.disable_eager_execution()
x = tf.placeholder(tf.float32,shape=(None,90,3)) #(None,90,3)
y = tf.placeholder(tf.float32,shape=(None,2)) #(None,3)
3.3.3 网络结构
卷积层+池化层
#layer1
h1 = tf.layers.conv1d(x,256,4,2,'SAME',use_bias=True,activation=tf.nn.relu) #(None,45,259)
p1 = tf.layers.max_pooling1d(h1,2,2,padding='VALID') #(None,22,256)
print(h1.get_shape())
print(p1.get_shape())
卷积层+池化层
h2 = tf.layers.conv1d(p1,256,4,2,'SAME',use_bias=True,activation=tf.nn.relu)
p2 = tf.layers.max_pooling1d(h2,2,2,padding='VALID')
卷积层+池化层+平铺层
#layer3
h3 = tf.layers.conv1d(p1,2,4,2,'SAME',use_bias=True,activation=tf.nn.relu)
p3 = tf.layers.max_pooling1d(h3,11,1,padding='VALID')
res = tf.reshape(p3,shape=(-1,2))
print(h3.get_shape())
print(p3.get_shape())
print(res.shape)
3.3.4 随机梯度下降
#loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=res,labels=y))
#正确率
ac = tf.cast(tf.equal(tf.argmax(res,1),tf.argmax(y,1)),tf.float32)
acc = tf.reduce_mean(ac)
#优化器
optim = tf.train.AdamOptimizer(0.0001).minimize(loss)
3.3.5 训练
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10000):
sess.run(optim,feed_dict={x:x_train,y:y_train})
if i%100 ==0:
los,accuracy = sess.run([loss,acc],feed_dict={x:x_train,y:y_train})
print(loss,los)
ccc = sess.run(tf.argmax(res,1),feed_dict={x:x_test,y:y_test})
print(ccc)
notebook下载地址: