基于tensorflow的深度学习框架优化

最新推荐文章于 2024-06-04 07:50:53 发布

卓玛cug

最新推荐文章于 2024-06-04 07:50:53 发布

阅读量1.3k

点赞数

分类专栏：深度学习文章标签： python 图像识别 tensorflow

本文链接：https://blog.csdn.net/qq_29153321/article/details/80058145

版权

深度学习专栏收录该内容

20 篇文章 3 订阅

订阅专栏

以下3个模型均能完成对多个类别的图像识别的过程。

Part 1:

Tensorflow简单框架的搭建，

第一步：对图片进行预处理，包括灰度化、尺寸改变等，并构建文件图片批量读取及对应类别标签，training_images存于目录，下面有3个类别的批量图片，对应3个类别的labes如下图所示：

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 转化图片大小

imgs = []
labs = []

# 文件路径
file_dir = './training_images'

# 读取图片数据，并对应做记号1,2，···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的文件夹包含的文件或文件夹的名字的列表
        if filename.endswith('.jpg'): # 判断字符串结尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 读取文件流
            # cv2.waitKey(0) # 显示永久
            # 转为灰度图片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改变图片尺寸大小
            # 保存图片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 记住图片来自第几个类别，并读取图片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 统计文件数量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 统计文件夹数量
    return dirNum

dirNum = file_name(file_dir) # 读取图片及对应类别，返回总类别个数

# 将图片数据与标签转换成数组
imgs = np.array(imgs) # 图片数据
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list) # 类别标签

第二步：使用sklearn里的库以0.05的比例随机生成训练集和测试集，然后改变数据集形状，标准化



# 随机划分测试集与训练集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 参数：改变形状
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 将数据转换成小于1的数
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 输入数据地方,计算图
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

设计Tensorflow简单框架，包括sess、W、b初始化，softmax函数预测分类情况，并继续求loss，使用固定学习率0.5优化，使用MBGD小批量梯度下降法，设置batch_size进行小批量样本迭代训练，最后求准确率


sess = tf.InteractiveSession() # 注册默认session

# W、b形状，初始全为0
W = tf.Variable(tf.zeros([784, dirNum]))
b = tf.Variable(tf.zeros([dirNum]))

y = tf.nn.softmax(tf.matmul(x, W) + b)  # softmax(Wx+b)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) # loss,每个batch_size均值

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 学习率固定0.5，训练loss

tf.global_variables_initializer().run() # 全局参数初始化器并运行

# 图片块，每次取20张图片
batch_size = 20
num_batch = len(train_x) // batch_size # 总体样本需要取几次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]
        train_step.run({x: batch_x, y_: batch_y})

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 预测是否正确

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 准确率

print(accuracy.eval({x: test_x, y_: test_y}))

Part 2 :

与上对比优化的地方在于：

1）权重初始化不再全为0，而为截断的正态分布函数，标准差设为0.1，共有tf.random_normal正态分布 & tf.random_uniform 均匀分布& tf.truncated_normal 截断的正态分布，不会大于平均值2个标准差& tf.random_shuffle等几种方式

2）加入隐含层hidden1，使用Relu函数激活，激活函数还要sigmoid，tanh等，CNN一般用Relu防止梯度弥散。

3 ) 使用dropout层防止过拟合，即在训练中让某些节点置为0，不参与运算，参数keep_prob（0,1），但在最后预测一般需设为1，因为预测需要用全特征。还要使用正则化或lrn层等方式。

4）使用自适应的学习率算法Adagrad，还要Adam、Adadelta等算法减少参数调试负担。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 784

imgs = []
labs = []

# 文件路径
file_dir = './training_images'

# 读取图片数据，并做记号1,2，···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的文件夹包含的文件或文件夹的名字的列表
        if filename.endswith('.jpg'): # 判断字符串结尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 读取文件流
            # cv2.waitKey(0) # 显示永久
            # 转为灰度图片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改变图片尺寸大小
            # 保存图片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 记住图片来自第几个类别，并读取图片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 统计文件数量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 统计文件夹数量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir) # 读取图片及对应类别，返回总类别个数

# 将图片数据与标签转换成数组
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 随机划分测试集与训练集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 参数：改变形状
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 将数据转换成小于1的数
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 输入数据地方,计算图
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

sess = tf.InteractiveSession()  # 注册默认session

in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1)) # 权重初始化为截断的正态分布函数，标准差设为0.1
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, dirNum]))
b2 = tf.Variable(tf.zeros([dirNum]))

keep_prob = tf.placeholder(tf.float32)

hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1) # 使用Relu函数激活
hidden1_drop = tf.nn.dropout(hidden1, keep_prob) # droupout,keep_prob为节点不置为0比率，训练时小于1，预测时等于1
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2) # softmax分类

# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy) # 使用Adagrad自适应优化学习率

# Train
tf.global_variables_initializer().run()

batch_size = 20
num_batch = len(train_x) // batch_size # 总体样本需要取几次
for i in range(500):
  for n in range(num_batch):
      batch_x = train_x[n * batch_size: (n + 1) * batch_size]
      batch_y = train_y[n * batch_size: (n + 1) * batch_size]
      train_step.run({x: batch_x, y_: batch_y, keep_prob: 0.75})

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 预测是否正确
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 准确率
print(accuracy.eval({x: test_x, y_: test_y, keep_prob: 1.0}))

Part 3 :

此处为完整的CNN卷积神经网络的框架搭建。

经历两层卷积激活、池化层、一层全连接、一层dropout、一层输出层，各层的具体实现及说明已在代码中备注，上面的1）-4）优化在该模型中均已用到，只不过改用了Adam学习率自适应函数。在代码最后的写法上面也有一些改善，比如增加了100次显示一次准确率的输出，当达到很高值时说明已经是一个很好的模型了，直接结束。下面代码可以作为基于tf的标准的CNN深度学习框架模型。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28  # 784

imgs = []
labs = []

# 文件地址
file_dir = './training_images'

# 读取图片数据，并做记号1,2，···
def readData(path, dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的文件夹包含的文件或文件夹的名字的列表
        if filename.endswith('.jpg'):  # 判断字符串结尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename)  # 读取文件流
            # cv2.waitKey(0) # 显示永久
            # 转为灰度图片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # cv2.imshow('image', img1)
            img = cv2.resize(img, (h, w))  # 改变图片尺寸大小
            # 保存图片
            cv2.imwrite('./other' + '/' + str(dirNum + 1) + '.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 统计文件数量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 统计文件夹数量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir)  # 读取图片及对应类别，返回总类别个数

# 将图片数据与标签转换成数组
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 随机划分测试集与训练集
train_x, test_x, train_y, test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0, 100))
print(train_x.shape)
# 参数：图片数据的总数，图片的高、宽、通道（灰度图为1，彩色图为3）
train_x = train_x.reshape(train_x.shape[0], size, size, 1)
test_x = test_x.reshape(test_x.shape[0], size, size, 1)
# 将数据转换成小于1的数
train_x = train_x.astype('float32') / 255.0
test_x = test_x.astype('float32') / 255.0

sess = tf.InteractiveSession()

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder(tf.float32, [None, size, size, 1])
y_ = tf.placeholder(tf.float32, [None, dirNum])

# 卷积激活、池化
W_conv1 = weight_variable([5, 5, 1, 32])  # 卷积核大小(5,5)， 输入通道(1)， 输出通道(32),代表32个卷积核
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # 2*2最大池化

# 卷积激活、池化
W_conv2 = weight_variable([5, 5, 32, 64])  # 5*5
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# 全连接层
W_fc1 = weight_variable([7 * 7 * 64, 1024])  # 经过两次池化，28*28/2/2=7*7
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# dropout层
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 输出层
W_fc2 = weight_variable([1024, dirNum])
b_fc2 = bias_variable([dirNum])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))  # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)  # 使用Adam自适应优化学习率

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 准确率
tf.global_variables_initializer().run()

# 一次取20张
batch_size = 20
num_batch = len(train_x) // batch_size # 总体样本需要取几次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]

        # batch = mnist.train.next_batch(50)
        if (i * num_batch + n)  % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x: batch_x, y_: batch_y, keep_prob: 1.0})
            print("step %d, training accuracy %g" % (i, train_accuracy))
            if train_accuracy > 0.99 and i > 2:
                sys.exit(0)
        train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

print("test accuracy %g" % accuracy.eval(feed_dict={
    x: test_x, y_: test_y, keep_prob: 1.0}))

最终使用我自己的3个类别图片（小车、飞机、摩托车几百张）发现前两种方法的结果准确率并不明显，第三种准确率最高达到了100%，也是符合不断优化的一个过程。

不过本文重点放在了优化的可能方式上，并没有对某一种进一步优化。重点介绍了权重初始化方式、隐含层及激活函数类别、防止过拟合方法、学习率自适应算法，当然模型中还要一些可以调节的参数，比如batch_size(每批次样本训练数)、迭代次数、卷积层（卷积核大小、特征数、层数）、池化层（池化大小及方式）等，此处因为没有理论支持暂未涉及。通过本文你应该了解到了基于tensorflow建立CNN框架的模型，迈进了图像识别的一小步。

卓玛cug

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
1
评论
基于tensorflow的深度学习框架优化

以下3个模型均能完成对多个类别的图像识别的过程。Part 1:Tensorflow简单框架的搭建，第一步：对图片进行预处理，包括灰度化、尺寸改变等，并构建文件图片批量读取及对应类别标签，training_images存于目录，下面有3个类别的批量图片，对应3个类别的labes如下图所示：# -*- coding:utf-8 -*-import tensorflow as ...
复制链接

扫一扫

专栏目录