创新实训日记五：视线追踪模型结构的调整和数据集的改进

最新推荐文章于 2024-02-10 21:35:30 发布

AFXBR

最新推荐文章于 2024-02-10 21:35:30 发布

阅读量622

点赞数 2

分类专栏：创新实训

本文链接：https://blog.csdn.net/AFXBR/article/details/89431779

版权

创新实训专栏收录该内容

11 篇文章 1 订阅

订阅专栏

本周工作内容依然是继续调整模型结构，试图优化结果。在上周设计完成新的数据集，并在本周进行了数据的采集之后，我们在初步数据集上对两个新的模型进行了尝试。

一个是减少特征，只有一层卷积、一层池化、一层全连接结构的简单模型；一个是增加特征，包括脸部卷积、池化、全连接，眼睛部位卷积、池化、全连接以及总的全连接等结构的复杂模型。

相关代码如下
简单模型

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: vali
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 27 19:54:34 2019

@author: vali
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 31 21:02:14 2019

@author: vali
"""

# coding:utf8
 
import tensorflow as tf
import numpy as np
import loadData2 as ld
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
def weight_variable(shape):
	'''
	使用卷积神经网络会有很多权重和偏置需要创建,我们可以定义初始化函数便于重复使用
	这里我们给权重制造一些随机噪声避免完全对称,使用截断的正态分布噪声,标准差为0.1
	:param shape: 需要创建的权重Shape
	:return: 权重Tensor
	'''
	initial = tf.random_normal(shape,stddev=0.01)
	return tf.Variable(initial)
 
 
def bias_variable(shape):
	'''
	偏置生成函数,因为激活函数使用的是ReLU,我们给偏置增加一些小的正值(0.1)避免死亡节点(dead neurons)
	:param shape:
	:return:
	'''
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial)
 
 
def conv2d(x, W):
	'''
	卷积层接下来要重复使用,tf.nn.conv2d是Tensorflow中的二维卷积函数,
	:param x: 输入 例如[5, 5, 1, 32]代表 卷积核尺寸为5x5,1个通道,32个不同卷积核
	:param W: 卷积的参数
		strides:代表卷积模板移动的步长,都是1代表不遗漏的划过图片的每一个点.
		padding:代表边界处理方式,SAME代表输入输出同尺寸
	:return:
	'''
	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
def max_pool_2x2(x):
	'''
	tf.nn.max_pool是TensorFLow中最大池化函数.我们使用2x2最大池化
	因为希望整体上缩小图片尺寸,因而池化层的strides设为横竖两个方向为2步长
	:param x:
	:return:
	'''
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

def generator():
    eye_img,img_info,img_label = ld.load_data('dataset_300.json')
    length = len(eye_img)
    index = 0
    while (True):
        input_eye = eye_img[index]
        input_img_info = img_info[index]
        input_label = img_label[index]
        
        yield(input_eye,input_img_info,input_label)
        
        index+=1
        if(index==length):
            index=0
        
        
 
def train(val_eye,val_data_info,val_label):
    
    data = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32,tf.float32),(tf.TensorShape([25,50,3]),
                                                     tf.TensorShape([4]),tf.TensorShape([2])))
    data = data.shuffle(158).batch(25)#当shuffle为数据集大小的时候是完全打乱，batch是一次的大小
    data = data.repeat()#代数
    data = data.make_one_shot_iterator()
    gen = data.get_next()
    
    
    
    
    
	# 使用占位符
    x0 = tf.placeholder(tf.float32, [None, 25,50,3],'x0')# x为特征  
   
    x_info = tf.placeholder(tf.float32,[None,4],'x_info')#x_info
    
    
    y_ = tf.placeholder(tf.float32, [None,2],'y0')# y_为label

    
    
    #眼睛部分网络
	# 第一个卷积层  [5, 5, 3, 32]代表 卷积核尺寸为5x5,3个通道,32个不同卷积核
	# 创建滤波器权值-->加偏置-->卷积-->池化
    W_conv_eye = weight_variable([5, 5, 3, 20])
    b_conv_eye = bias_variable([20])
    h_conv_eye = tf.nn.relu(conv2d(x0, W_conv_eye)+b_conv_eye) #25x50x3 与32个5x5x1滤波器 --> 25x50x32
    h_pool_eye = max_pool_2x2(h_conv_eye)  # 25x50x32 -->13x25x32
    
    h_pool_eye_flat = tf.reshape(h_pool_eye,[-1,13*25*20])
    
    keep_prob_eye = tf.placeholder(tf.float32)
    h_eye_drop = tf.nn.dropout(h_pool_eye_flat,keep_prob_eye)
    
    h_test = tf.concat([h_eye_drop,x_info],1)
    
    W_fc_eye = weight_variable([13*25*20+4,2])
    b_fc_eye = bias_variable([2])
    y_conv = tf.nn.tanh(tf.matmul(h_test,W_fc_eye)+b_fc_eye)
    
 
 
	# 定义损失函数,使用均方误差  同时定义优化器  learning rate = 1e-4
    cross_entropy = tf.losses.mean_squared_error(y_conv,y_)
    train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
 
	# 定义评测准确率
   
	#开始训练
    #writer = tf.summary.FileWriter("./", tf.get_default_graph())
    #saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer() #初始化所有变量
        sess.run(init_op)
        
        
 
        STEPS = 600
        for i in range(STEPS):
			#batch = 10
            batch_eye,batch_img_info ,batch_y= sess.run(gen)
            
            if i % 2 == 0:
                train_cross_entropy= sess.run(cross_entropy,feed_dict={x0:batch_eye , y_:batch_y,
                                                                       x_info:batch_img_info, keep_prob_eye:1.0})
                print(i,train_cross_entropy)
                    
                
            sess.run(train_step, feed_dict={x0: batch_eye,y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:0.2})
        #saver.save(sess,'./model/model',global_step=STEPS)
        
        
        #validation
        for i in range(10):
            batch_eye=val_eye[i].reshape((1,25,50,3))
            batch_img_info =val_data_info[i].reshape((1,4))
            batch_y= val_label[i].reshape((1,2))
            res=sess.run([cross_entropy,y_conv], feed_dict={x0: batch_eye, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:1.0})
            print('--------------validation---------------')
            print(i,res[0])
            print(batch_y,res[1])
            

if __name__=="__main__":
   val_eye,val_data_info,val_label = ld.load_data('dataset_300.json',1)
   train(val_eye,val_data_info,val_label )

复杂模型

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: vali
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 31 21:02:14 2019

@author: vali
"""

# coding:utf8
 
import tensorflow as tf
import numpy as np
import loadData as ld
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
def weight_variable(shape):
	'''
	使用卷积神经网络会有很多权重和偏置需要创建,我们可以定义初始化函数便于重复使用
	这里我们给权重制造一些随机噪声避免完全对称,使用截断的正态分布噪声,标准差为0.1
	:param shape: 需要创建的权重Shape
	:return: 权重Tensor
	'''
	initial = tf.random_normal(shape,stddev=0.01)
	return tf.Variable(initial)
 
 
def bias_variable(shape):
	'''
	偏置生成函数,因为激活函数使用的是ReLU,我们给偏置增加一些小的正值(0.1)避免死亡节点(dead neurons)
	:param shape:
	:return:
	'''
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial)
 
 
def conv2d(x, W):
	'''
	卷积层接下来要重复使用,tf.nn.conv2d是Tensorflow中的二维卷积函数,
	:param x: 输入 例如[5, 5, 1, 32]代表 卷积核尺寸为5x5,1个通道,32个不同卷积核
	:param W: 卷积的参数
		strides:代表卷积模板移动的步长,都是1代表不遗漏的划过图片的每一个点.
		padding:代表边界处理方式,SAME代表输入输出同尺寸
	:return:
	'''
	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
def max_pool_2x2(x):
	'''
	tf.nn.max_pool是TensorFLow中最大池化函数.我们使用2x2最大池化
	因为希望整体上缩小图片尺寸,因而池化层的strides设为横竖两个方向为2步长
	:param x:
	:return:
	'''
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

def generator():
    eye_img,face_img,img_info,img_label = ld.load_data('dataset_300.json')
    length = len(eye_img)
    index = 0
    while (True):
        input_eye = eye_img[index]
        input_face = face_img[index]
        input_img_info = img_info[index]
        input_label = img_label[index]
        
        yield(input_eye,input_face,input_img_info,input_label)
        
        index+=1
        if(index==length):
            index=0
        
        
 
def train(val_eye,val_face,val_data_info,val_label):
    
    data = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32,tf.float32,tf.float32),(tf.TensorShape([25,50,3]),
                                                     tf.TensorShape([50,50,3]),tf.TensorShape([4]),tf.TensorShape([2])))
    data = data.shuffle(241).batch(25)#当shuffle为数据集大小的时候是完全打乱，batch是一次的大小
    data = data.repeat()#代数
    data = data.make_one_shot_iterator()
    gen = data.get_next()
    
	# 使用占位符
    x0 = tf.placeholder(tf.float32, [None, 25,50,3],'x0')# x为特征  
   
    x_face = tf.placeholder(tf.float32, [None, 50,50,3],'x_face')
    x_info = tf.placeholder(tf.float32,[None,4],'x_info')#x_info
    
    
    y_ = tf.placeholder(tf.float32, [None,2],'y0')# y_为label

    
    
    #眼睛部分网络
	# 第一个卷积层  [5, 5, 3, 32]代表 卷积核尺寸为5x5,3个通道,32个不同卷积核
	# 创建滤波器权值-->加偏置-->卷积-->池化
    W_conv_eye = weight_variable([5, 5, 3, 32])
    b_conv_eye = bias_variable([32])
    h_conv_eye = tf.nn.relu(conv2d(x0, W_conv_eye)+b_conv_eye) #25x50x3 与32个5x5x1滤波器 --> 25x50x32
    h_pool_eye = max_pool_2x2(h_conv_eye)  # 25x50x32 -->13x25x32
    
    h_pool_eye_flat = tf.reshape(h_pool_eye,[-1,13*25*32])
    
    keep_prob_eye = tf.placeholder(tf.float32)
    h_eye_drop = tf.nn.dropout(h_pool_eye_flat,keep_prob_eye)
    
    h_test = tf.concat([h_eye_drop,x_info],1)
    
    W_fc_eye = weight_variable([13*25*32,128])
    b_fc_eye = bias_variable([128])
    h_fc_eye = tf.nn.tanh(tf.matmul(h_eye_drop,W_fc_eye)+b_fc_eye)
    
    
    #脸部分，两层卷积
    W_conv_face1 = weight_variable([5,5,3,32])
    b_conv_face1 = bias_variable([32])
    h_conv_face1 = tf.nn.relu(conv2d(x_face,W_conv_face1)+b_conv_face1)
    h_pool_face1 = max_pool_2x2(h_conv_face1)
    
    W_conv_face2 = weight_variable([5,5,32,64])
    b_conv_face2 = bias_variable([64])
    h_conv_face2 = tf.nn.relu(conv2d(h_pool_face1,W_conv_face2)+b_conv_face2)
    h_pool_face2 = max_pool_2x2(h_conv_face2)
    
    h_pool_face_flat= tf.reshape(h_pool_face2,[-1,13*13*64])
    
    W_fc_face1 = weight_variable([13*13*64,128])
    b_fc_face1 = bias_variable([128])
    h_fc_face1  = tf.nn.tanh(tf.matmul(h_pool_face_flat,W_fc_face1)+b_fc_face1)
    keep_prob_face = tf.placeholder(tf.float32)
    h_fc_face1_drop = tf.nn.dropout(h_fc_face1,keep_prob_face)
    
    
    W_fc_face2 = weight_variable([128,64])
    b_fc_face2 = bias_variable([64])
    h_fc_face2  = tf.nn.tanh(tf.matmul(h_fc_face1_drop,W_fc_face2)+b_fc_face2)
    
    
    
    #合并
    h_pool_merge1 = tf.concat([h_fc_eye,h_fc_face2],1)
    
#    W_fc_feature = weight_variable([4,32])
#    b_fc_feature = bias_variable([32])
#    h_fc_feature  = tf.nn.tanh(tf.matmul(x_info,W_fc_feature)+b_fc_feature)
    
    h_pool_merge2 = tf.concat([h_pool_merge1,x_info],1)
 
    W_fc1 = weight_variable([196, 128])
    b_fc1 = bias_variable([128])
    
    h_fc1 = tf.nn.tanh(tf.matmul(h_pool_merge2, W_fc1) + b_fc1)  #FC层传播 196--> 128
    
	# 使用Dropout层减轻过拟合,通过一个placeholder传入keep_prob比率控制
	# 在训练中,我们随机丢弃一部分节点的数据来减轻过拟合,预测时则保留全部数据追求最佳性能
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
	# 将Dropout层的输出连接到一个Softmax层,得到最后的概率输出
    W_fc2 = weight_variable([128, 2])  #2种输出可能
    b_fc2 = bias_variable([2])
    y_conv = tf.nn.tanh(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
 
	# 定义损失函数,使用均方误差  同时定义优化器  learning rate = 1e-4
    cross_entropy = tf.losses.mean_squared_error(y_conv,y_)
    train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
 
	# 定义评测准确率
    accuracy = y_conv
	#开始训练
    #writer = tf.summary.FileWriter("./", tf.get_default_graph())
    #saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer() #初始化所有变量
        sess.run(init_op)
        
        
 
        STEPS = 1000
        for i in range(STEPS):
			#batch = 10
            batch_eye,batch_face,batch_img_info ,batch_y= sess.run(gen)
            
            if i % 2 == 0:
                train_cross_entropy= sess.run(cross_entropy,feed_dict={x0:batch_eye , x_face:batch_face,y_:batch_y,
                                                                       x_info:batch_img_info, keep_prob_eye:1.0,keep_prob_face: 1.0,keep_prob:1.0})
                print(i,train_cross_entropy)
                    
                
            sess.run(train_step, feed_dict={x0: batch_eye,x_face:batch_face, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:0.2,keep_prob_face: 0.8,keep_prob:0.5})
        #saver.save(sess,'./model/model',global_step=STEPS)
        
        
        #validation
        for i in range(10):
            batch_eye=val_eye[i].reshape((1,25,50,3))
            batch_face=val_face[i].reshape((1,50,50,3))
            batch_img_info =val_data_info[i].reshape((1,4))
            batch_y= val_label[i].reshape((1,2))
            res=sess.run([cross_entropy,y_conv], feed_dict={x0: batch_eye,x_face:batch_face, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:1.0,keep_prob_face: 1.0,keep_prob:1.0})
            print('--------------validation---------------')
            print(i,res[0])
            print(batch_y,res[1])
            
        

if __name__=="__main__":
   val_eye,val_face,val_data_info,val_label = ld.load_data('dataset.json',1)
   train(val_eye,val_face,val_data_info,val_label )