TF中建立vgg16并实现七分类

最新推荐文章于 2023-09-26 11:22:09 发布

cumtbZhaozy

最新推荐文章于 2023-09-26 11:22:09 发布

阅读量882

点赞数

分类专栏： tensorflow学习笔记文章标签： tensorflow

本文链接：https://blog.csdn.net/weixin_41707744/article/details/105005237

版权

tensorflow学习笔记专栏收录该内容

8 篇文章 0 订阅

订阅专栏

实验综述

1.基于vgg16模型进行迁移学习；因为总体数据集偏小，所以只训练了最后一层的参数，而冻结所有其他层；当然当数据集增大时，可以向前多结冻几层网络进行训练。

2.总体的实施步骤为：

（1）处理数据

（2）建立vgg16网络模型类

（3）利用网络模型类实现模型复用、载入数据、训练模型与权重保存

（4）使用训练好的模型进行分类

3.所用数据集为自行拍摄的照片，共七类，每类约30张图；且每类图像在各自文件夹中，对应于类名对各自文件夹命名。

4.所实现的vgg16结构为：

input->conv->conv->pool->conv->conv->pool->conv->conv->conv->pool->conv->conv->conv->pool->conv->conv->conv->pool->

fc->fc->fc->softmax->output

处理数据

对于数据进行旋转、加噪声实现数据的扩增，具体代码如下。

命名为预处理.py

import os
import cv2
import random
import numpy as np

#  添加椒盐噪声辅助函数
#  prob:噪声比例
def sp_noise(image, prob):
  output = np.zeros(image.shape, np.uint8)
  thres = 1 - prob
  for i in range(image.shape[0]):
    for j in range(image.shape[1]):
      rdn = random.random()
      if rdn < prob:
        output[i][j] = 0
      elif rdn > thres:
        output[i][j] = 255
      else:
        output[i][j] = image[i][j]
  return output


# 定义预处理函数,实现在原目录下批量处理图像
def alter(path):
  s = os.listdir(path)  # 返回图像名列表
  count = 0  # 计数
  for i in s:
    # print(i)  # 打印文件名
    document = os.path.join(path, i)  # 合成每张图的绝对路径
    img = cv2.imread(document)  # 载入图像

    img = cv2.resize(img, (224, 224))  # 尺寸重设
    img_hflip = cv2.flip(img, 1)  # 水平镜像
    img_vflip = cv2.flip(img, 0)  # 垂直镜像
    img_hvflip = cv2.flip(img, -1)  # 水平垂直镜像
    img_sp = sp_noise(img, 0.03)  # 椒盐噪声
    rows, cols = img.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90, 1)
    img_nt = cv2.warpAffine(img, M, (cols, rows))
    new_path = os.path.join(path, str(count))+".jpg"
    new_path_h = os.path.join(path, str(count)) +"h"+ ".jpg"
    new_path_v = os.path.join(path, str(count)) + "v"+".jpg"
    new_path_hv = os.path.join(path, str(count)) +"hv"+ ".jpg"
    new_path_nt = os.path.join(path, str(count)) + "nt" + ".jpg"
    new_path_sp = os.path.join(path, str(count)) + "sp" + ".jpg"
    count += 1

    cv2.imwrite(new_path, img)
    print(new_path)
    cv2.imwrite(new_path_h, img_hflip)
    print(new_path_h)
    cv2.imwrite(new_path_v, img_vflip)
    print(new_path_v)
    cv2.imwrite(new_path_hv, img_hvflip)
    print(new_path_hv)
    cv2.imwrite(new_path_nt, img_nt)
    print(new_path_nt)
    cv2.imwrite(new_path_sp, img_sp)
    print(new_path_sp)
# 调用批量处理函数，对于路径下的图像进行处理
# 注意此处是子文件夹的路径，也就是对于每个类的文件夹，都要填一遍运行一遍
alter('D:\\M\\twentyfive\\')

建立vgg16网络模型类

命名为vgg-model.py文件

import tensorflow as tf
import numpy as np
import os
from vgg_preprocessing import preprocess_for_train  # 此处引进tf官方的vgg图像预处理模块


class Vgg16:  # 定义vgg16类
    def __init__(self, imgs):
        self.parameters = []  # 全局列表，用来传递参数，或者说载入训练好的模型的参数
        self.imgs = imgs  # 模型的输入的图像序列
        self.convlayers()  # 模型的卷积结构
        self.fc_layers()  # 模型的全连接结构
        self.probs = tf.nn.softmax(self.fc8)  # 模型的输出

    def saver(self):  # 存储对象，用于存储模型
        return tf.train.Saver()
    # 池化实现函数
    # 节点名称、输入数据 
    def maxpool(self, name, input_data):  
        out = tf.nn.max_pool(input_data, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME', name=name)  # 池化参数如上设置，池化后图像尺寸减为1/4，即长宽各减一半
        return out
    # 2维卷积的实现函数
    # 节点名称、输入数据、输出通道数、是否解冻
    def conv(self, name, input_data, out_channel, trainable=False):  
        in_channel = input_data.get_shape()[-1]  # 获取输入数据的通道数
        with tf.variable_scope(name):  3 使用命名空间，空间名由实参传入
            # 定义卷积核 卷积核尺寸固定3X3，通道数由输入数据决定
            kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=trainable)
            # 定义偏置
            biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=trainable)
            # 卷积
            conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")
            # 偏置与激活
            res = tf.nn.bias_add(conv_res, biases)
            out = tf.nn.relu(res, name=name)  # 使用relu进行激活，效果不好的话，可以考虑使用s型函数
        self.parameters += [kernel, biases]  # 将参数加入到全局列表
        return out
    # 全连接层的实现函数
    # 节点名称、输入数据‘输出通道、是否解冻
    def fc(self, name, input_data, out_channel, trainable=True): 
        # 根据输入的维度获取扁平化所需参数，即输入通道数
        shape = input_data.get_shape().as_list()
        if len(shape) == 4:
            size = shape[-1]*shape[-2]*shape[-3]  # 如（-1，28，28，1） -》28X28X1=784
        else:
            size = shape[1]
        # 扁平化，这是卷积连接fc层前的必要操作
        input_data_flat = tf.reshape(input_data, [-1, size])

        with tf.variable_scope(name):
            weights = tf.get_variable(name="weights", shape=[size, out_channel], dtype=tf.float32, trainable=trainable)
            biases = tf.get_variable(name="biases", shape=[out_channel], dtype=tf.float32, trainable=trainable)
            res = tf.matmul(input_data_flat, weights)
            out = tf.nn.relu(tf.nn.bias_add(res, biases))
        self.parameters += [weights, biases]
        return out

    # 卷积层结构的构造函数
    def convlayers(self):  
        # conv1 第一层 输入-》卷积1（64）-》卷积2（64）-》最大池化
        self.conv1_1 = self.conv("conv1_1", self.imgs, 64, trainable=False)
        self.conv1_2 = self.conv("conv1_2", self.conv1_1, 64, trainable=False)
        self.pool1 = self.maxpool("pool1", self.conv1_2)

        # conv2 第二层 第一层输出-》卷积1（128）-》卷积2（128）-》最大池化
        self.conv2_1 = self.conv("conv2_1", self.pool1, 128, trainable=False)
        self.conv2_2 = self.conv("conv2_2", self.conv2_1, 128, trainable=False)
        self.pool2 = self.maxpool("pool2", self.conv2_2)

        # conv3 第三层 第二层输出-》卷积1（256）-》卷积2（256）-》卷积3（256）-》最大池化
        self.conv3_1 = self.conv("conv3_1", self.pool2, 256, trainable=False)
        self.conv3_2 = self.conv("conv3_2", self.conv3_1, 256, trainable=False)
        self.conv3_3 = self.conv("conv3_3", self.conv3_2, 256, trainable=False)
        self.pool3 = self.maxpool("pool3", self.conv3_3)

        # conv4 第四层 第三层输出-》卷积1（512）-》卷积2（512）-》卷积3（512）-》最大池化
        self.conv4_1 = self.conv("conv4_1", self.pool3, 512, trainable=False)
        self.conv4_2 = self.conv("conv4_2", self.conv4_1, 512, trainable=False)
        self.conv4_3 = self.conv("conv4_3", self.conv4_2, 512, trainable=False)
        self.pool4 = self.maxpool("pool4", self.conv4_3)

        # conv5 第五层 第四层输出-》卷积1（512）-》卷积2（512）-》卷积3（512）-》最大池化
        self.conv5_1 = self.conv("conv5_1", self.pool4, 512, trainable=False)
        self.conv5_2 = self.conv("conv5_2", self.conv5_1, 512, trainable=False)
        self.conv5_3 = self.conv("conv5_3", self.conv5_2, 512, trainable=False)
        self.pool5 = self.maxpool("pool4", self.conv5_3)
    
    # 全连接层结构构造函数
    # 注意要根据自己的实际需要，修改最后一个全连接层的输出通道数
    # 注意除了最后一层，所有的层trainable参数都是false即不解冻，只有最后一层参与训练与参数更迭
    def fc_layers(self):  
        self.fc6 = self.fc("fc6", self.pool5, 4096, trainable=False)
        self.fc7 = self.fc("fc7", self.fc6, 4096, trainable=False)
        self.fc8 = self.fc("fc8", self.fc7, 7, trainable=True)  # 最后的参数取决于分类数目
    # 权重载入参数
    # 以字典的形式载入参数
    def load_weights(self, weight_file, sess):
        weights = np.load(weight_file)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            if i not in [30, 31]:
                sess.run(self.parameters[i].assign(weights[k]))
        print("_______model loaded_________")

# 获取并载入数据
# 这个不是类中方法，是此模块中的辅助函数，用于获取类别与数据
# 输入为一个文件夹路径，此实验中，要将各类的文件夹放在一个大文件夹中，这个大文件夹就是此处的输入
def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        for name in files:
            images.append(os.path.join(root, name))
        for name in sub_folders:
            temp.append(os.path.join(root, name))
            labels = []
    for one_folder in temp:  # 对应于子文件夹名，将数据与标签值进行对应
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('/')[-1]
        if letter == 'zero':
            labels = np.append(labels, n_img*[0])
        elif letter == 'five':
            labels = np.append(labels, n_img * [1])
        elif letter == 'ten':
            labels = np.append(labels, n_img * [2])
        elif letter == 'fifteen':
            labels = np.append(labels, n_img * [3])
        elif letter == 'twenty':
            labels = np.append(labels, n_img * [4])
        elif letter == 'twentyfive':
            labels = np.append(labels, n_img * [5])
        else:
            labels = np.append(labels, n_img * [6])

    # 打乱数据集
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list  # 返回图像集与标签集


# 多线程批量读取数据
# 输入为get_file函数返回的图像集与标签集、输入图像的尺寸、批大小以及一次载到缓存的大小
# 建议图像尺寸为vgg官方训练时使用的224X224
# 后两个参数要根据硬件的实际请款调节，避免内存爆掉
img_width = 224
img_height = 224
def get_batch(img_list, label_list, img_width, img_height, batch_size, capacity):
    image = tf.cast(img_list, tf.string)
    label = tf.cast(label_list, tf.int32)
    input_queue = tf.train.slice_input_producer([image, label])
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])

    image = tf.image.decode_jpeg(image_contents, channels=3)
    image = preprocess_for_train(image, 224, 224)
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)

    label_batch = tf.reshape(label_batch, [batch_size])

    return image_batch, label_batch  # 返回值为抽取到的图像与标签值

# 独热编码
# 用于对标签值进行处理，在cnn中广泛使用，尤其是对于多分类问题
# 使用独热编码有利于计算准确率以及其他类似于欧式距离的参数
def onehot(labels):
    n_sample = len(labels)
    n_class = 7  # 这个要对应修改为实际类别
    onehot_labels = np.zeros((n_sample, n_class))
    onehot_labels[np.arange(n_sample), labels] = 1
    return onehot_labels

上述代码中使用到了tf中的vgg预处理模块；此模块可以从tf的github上下载，也可以用下述的代码块。

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# ==============================================================================

"""Provides utilities to preprocess images.



The preprocessing steps for VGG were introduced in the following technical

report:



  Very Deep Convolutional Networks For Large-Scale Image Recognition

  Karen Simonyan and Andrew Zisserman

  arXiv technical report, 2015

  PDF: http://arxiv.org/pdf/1409.1556.pdf

  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf

  CC-BY-4.0



More information can be obtained from the VGG website:

www.robots.ox.ac.uk/~vgg/research/very_deep/

"""



from __future__ import absolute_import

from __future__ import division

from __future__ import print_function


import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim

slim = contrib_slim

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94



_RESIZE_SIDE_MIN = 256

_RESIZE_SIDE_MAX = 512





def _crop(image, offset_height, offset_width, crop_height, crop_width):

  """Crops the given image using the provided offsets and sizes.



  Note that the method doesn't assume we know the input image size but it does

  assume we know the input image rank.



  Args:

    image: an image of shape [height, width, channels].

    offset_height: a scalar tensor indicating the height offset.

    offset_width: a scalar tensor indicating the width offset.

    crop_height: the height of the cropped image.

    crop_width: the width of the cropped image.



  Returns:

    the cropped (and resized) image.



  Raises:

    InvalidArgumentError: if the rank is not 3 or if the image dimensions are

      less than the crop size.

  """

  original_shape = tf.shape(image)



  rank_assertion = tf.Assert(

      tf.equal(tf.rank(image), 3),

      ['Rank of image must be equal to 3.'])

  with tf.control_dependencies([rank_assertion]):

    cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])



  size_assertion = tf.Assert(

      tf.logical_and(

          tf.greater_equal(original_shape[0], crop_height),

          tf.greater_equal(original_shape[1], crop_width)),

      ['Crop size greater than the image size.'])



  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))



  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to

  # define the crop size.

  with tf.control_dependencies([size_assertion]):

    image = tf.slice(image, offsets, cropped_shape)

  return tf.reshape(image, cropped_shape)





def _random_crop(image_list, crop_height, crop_width):

  """Crops the given list of images.



  The function applies the same crop to each image in the list. This can be

  effectively applied when there are multiple image inputs of the same

  dimension such as:



    image, depths, normals = _random_crop([image, depths, normals], 120, 150)



  Args:

    image_list: a list of image tensors of the same dimension but possibly

      varying channel.

    crop_height: the new height.

    crop_width: the new width.



  Returns:

    the image_list with cropped images.



  Raises:

    ValueError: if there are multiple image inputs provided with different size

      or the images are smaller than the crop dimensions.

  """

  if not image_list:

    raise ValueError('Empty image_list.')



  # Compute the rank assertions.

  rank_assertions = []

  for i in range(len(image_list)):

    image_rank = tf.rank(image_list[i])

    rank_assert = tf.Assert(

        tf.equal(image_rank, 3),

        ['Wrong rank for tensor  %s [expected] [actual]',

         image_list[i].name, 3, image_rank])

    rank_assertions.append(rank_assert)



  with tf.control_dependencies([rank_assertions[0]]):

    image_shape = tf.shape(image_list[0])

  image_height = image_shape[0]

  image_width = image_shape[1]

  crop_size_assert = tf.Assert(

      tf.logical_and(

          tf.greater_equal(image_height, crop_height),

          tf.greater_equal(image_width, crop_width)),

      ['Crop size greater than the image size.'])



  asserts = [rank_assertions[0], crop_size_assert]



  for i in range(1, len(image_list)):

    image = image_list[i]

    asserts.append(rank_assertions[i])

    with tf.control_dependencies([rank_assertions[i]]):

      shape = tf.shape(image)

    height = shape[0]

    width = shape[1]



    height_assert = tf.Assert(

        tf.equal(height, image_height),

        ['Wrong height for tensor %s [expected][actual]',

         image.name, height, image_height])

    width_assert = tf.Assert(

        tf.equal(width, image_width),

        ['Wrong width for tensor %s [expected][actual]',

         image.name, width, image_width])

    asserts.extend([height_assert, width_assert])



  # Create a random bounding box.

  #

  # Use tf.random_uniform and not numpy.random.rand as doing the former would

  # generate random numbers at graph eval time, unlike the latter which

  # generates random numbers at graph definition time.

  with tf.control_dependencies(asserts):

    max_offset_height = tf.reshape(image_height - crop_height + 1, [])

  with tf.control_dependencies(asserts):

    max_offset_width = tf.reshape(image_width - crop_width + 1, [])

  offset_height = tf.random_uniform(

      [], maxval=max_offset_height, dtype=tf.int32)

  offset_width = tf.random_uniform(

      [], maxval=max_offset_width, dtype=tf.int32)



  return [_crop(image, offset_height, offset_width,

                crop_height, crop_width) for image in image_list]





def _central_crop(image_list, crop_height, crop_width):

  """Performs central crops of the given image list.



  Args:

    image_list: a list of image tensors of the same dimension but possibly

      varying channel.

    crop_height: the height of the image following the crop.

    crop_width: the width of the image following the crop.



  Returns:

    the list of cropped images.

  """

  outputs = []

  for image in image_list:

    image_height = tf.shape(image)[0]

    image_width = tf.shape(image)[1]



    offset_height = (image_height - crop_height) / 2

    offset_width = (image_width - crop_width) / 2



    outputs.append(_crop(image, offset_height, offset_width,

                         crop_height, crop_width))

  return outputs





def _mean_image_subtraction(image, means):

  """Subtracts the given means from each image channel.



  For example:

    means = [123.68, 116.779, 103.939]

    image = _mean_image_subtraction(image, means)



  Note that the rank of `image` must be known.



  Args:

    image: a tensor of size [height, width, C].

    means: a C-vector of values to subtract from each channel.



  Returns:

    the centered image.



  Raises:

    ValueError: If the rank of `image` is unknown, if `image` has a rank other

      than three or if the number of channels in `image` doesn't match the

      number of values in `means`.

  """

  if image.get_shape().ndims != 3:

    raise ValueError('Input must be of size [height, width, C>0]')

  num_channels = image.get_shape().as_list()[-1]

  if len(means) != num_channels:

    raise ValueError('len(means) must match the number of channels')



  channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)

  for i in range(num_channels):

    channels[i] -= means[i]

  return tf.concat(axis=2, values=channels)





def _smallest_size_at_least(height, width, smallest_side):

  """Computes new shape with the smallest side equal to `smallest_side`.



  Computes new shape with the smallest side equal to `smallest_side` while

  preserving the original aspect ratio.



  Args:

    height: an int32 scalar tensor indicating the current height.

    width: an int32 scalar tensor indicating the current width.

    smallest_side: A python integer or scalar `Tensor` indicating the size of

      the smallest side after resize.



  Returns:

    new_height: an int32 scalar tensor indicating the new height.

    new_width: and int32 scalar tensor indicating the new width.

  """

  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)



  height = tf.to_float(height)

  width = tf.to_float(width)

  smallest_side = tf.to_float(smallest_side)



  scale = tf.cond(tf.greater(height, width),

                  lambda: smallest_side / width,

                  lambda: smallest_side / height)

  new_height = tf.to_int32(tf.rint(height * scale))

  new_width = tf.to_int32(tf.rint(width * scale))

  return new_height, new_width





def _aspect_preserving_resize(image, smallest_side):

  """Resize images preserving the original aspect ratio.



  Args:

    image: A 3-D image `Tensor`.

    smallest_side: A python integer or scalar `Tensor` indicating the size of

      the smallest side after resize.



  Returns:

    resized_image: A 3-D tensor containing the resized image.

  """

  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)



  shape = tf.shape(image)

  height = shape[0]

  width = shape[1]

  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)

  image = tf.expand_dims(image, 0)

  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],

                                           align_corners=False)

  resized_image = tf.squeeze(resized_image)

  resized_image.set_shape([None, None, 3])

  return resized_image





def preprocess_for_train(image,

                         output_height,

                         output_width,

                         resize_side_min=_RESIZE_SIDE_MIN,

                         resize_side_max=_RESIZE_SIDE_MAX,

                         use_grayscale=False):

  """Preprocesses the given image for training.



  Note that the actual resizing scale is sampled from

    [`resize_size_min`, `resize_size_max`].



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    resize_side_min: The lower bound for the smallest side of the image for

      aspect-preserving resizing.

    resize_side_max: The upper bound for the smallest side of the image for

      aspect-preserving resizing.

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  resize_side = tf.random_uniform(

      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)



  image = _aspect_preserving_resize(image, resize_side)

  image = _random_crop([image], output_height, output_width)[0]

  image.set_shape([output_height, output_width, 3])

  image = tf.to_float(image)

  if use_grayscale:

    image = tf.image.rgb_to_grayscale(image)

  image = tf.image.random_flip_left_right(image)

  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])





def preprocess_for_eval(image,

                        output_height,

                        output_width,

                        resize_side,

                        use_grayscale=False):

  """Preprocesses the given image for evaluation.



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    resize_side: The smallest side of the image for aspect-preserving resizing.

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  image = _aspect_preserving_resize(image, resize_side)

  image = _central_crop([image], output_height, output_width)[0]

  image.set_shape([output_height, output_width, 3])

  image = tf.to_float(image)

  if use_grayscale:

    image = tf.image.rgb_to_grayscale(image)

  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])





def preprocess_image(image,

                     output_height,

                     output_width,

                     is_training=False,

                     resize_side_min=_RESIZE_SIDE_MIN,

                     resize_side_max=_RESIZE_SIDE_MAX,

                     use_grayscale=False):

  """Preprocesses the given image.



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    is_training: `True` if we're preprocessing the image for training and

      `False` otherwise.

    resize_side_min: The lower bound for the smallest side of the image for

      aspect-preserving resizing. If `is_training` is `False`, then this value

      is used for rescaling.

    resize_side_max: The upper bound for the smallest side of the image for

      aspect-preserving resizing. If `is_training` is `False`, this value is

      ignored. Otherwise, the resize side is sampled from

        [resize_size_min, resize_size_max].

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  if is_training:

    return preprocess_for_train(image, output_height, output_width,

                                resize_side_min, resize_side_max,

                                use_grayscale)

  else:

    return preprocess_for_eval(image, output_height, output_width,

                               resize_side_min, use_grayscale)

利用网络模型类实现模型的复用载入数据、训练模型与权重保存

利用上文中建立的vgg16类进行模型的实例化与应用。

使用到了官方的预训练模型，下载地址为：https://www.cs.toronto.edu/~frossard/vgg16/vgg16_weights.npz

实例化时，应进行传预训练模型的参数，调整冻结与解冻来配置可训练的网络层。

具体的实现代码如下所述。

from time import time
import numpy as np
import tensorflow as tf
import vgg_model as model
import os

start_time = time()  # 开始计时
batch_size = 32  # 单批次抽取数据量
capicity = 256  # 一次载入缓存的量
means = [113.68, 116.779, 103.939]  # 三通道均值，用于预处理图像

# 载入数据，并进行抽取，定义输入的占位符
xs, ys = model.get_file("./M/")  # 填入总的文件路径,要求各类样本在不同的文件夹中
image_batch, label_batch = model.get_batch(xs, ys, 224, 224, batch_size, capicity)
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y = tf.placeholder(tf.float32, [None, 7])  # 要对应分类数目

# 使用模型时要改类中的fc最后一层,对应于类数目
vgg = model.Vgg16(x)
# 获取前向计算结果
fc8_fintuning = vgg.probs
# 定义损失函数
loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc8_fintuning, labels=y))
# 定义优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss_function)

# 启动会话并初始化
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 载入预训练模型
vgg.load_weights("vgg16_weights.npz", sess)
saver = tf.train.Saver()


# 开线程
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
epoch_start_time = time()

# 开始迭代训练
for i in range(100000):
    images, labels = sess.run([image_batch, label_batch])
    # print("y src shape: ", labels.shape)
    print("y src: ", labels)
    labels = model.onehot(labels)
    # print("y shape: ", labels.shape)
    print("labels: ", labels)
    sess.run(optimizer, feed_dict={x: images, y: labels})
    loss = sess.run(loss_function, feed_dict={x: images, y: labels})
    print("loss: ", loss)
    epoch_end_time = time()
    print("当前轮次耗时：", (epoch_end_time-epoch_start_time))
    epoch_start_time = epoch_end_time
    if (i+1) % 500 == 0:
        saver.save(sess, os.path.join("./model/", "epoch{:06d}.ckpt".format(i)))
    print("epoch %d is finished!!" % i)
# 保存模型
saver.save(sess, "./model/")
print("optimizer finish!!")
duration = time()-start_time
print("全程耗时：", "{:.2f}".format(duration))
# 关闭线程
coord.request_stop()
coord.join(threads)

使用训练好的模型进行分类

撒啊

import tensorflow as tf
import vgg_model as model
import cv2
import numpy as np

# 定义输入
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
# 开启会话
sess = tf.Session()
# 实例化模型
vgg = model.Vgg16(x)
# 获取前向计算值
fc8_fintuning = vgg.probs
# 定义存储对象
saver = tf.train.Saver()
# 开始载入训练好的模型
# 注意训练的网络与分类的网络必须要同结构，否则不会成功载入
print("Model restoring: ....")
saver.restore(sess, "./model/")  # 恢复最新权重
# saver.restore(sess, "./model/epoch_0080.ckpt")  # 指定恢复某一版本权重

file_path = "./M/twentyfive/4.jpg"  # 填入待测图像路径
img = cv2.imread(file_path)
# img = cv2.resize(img, [224, 224, 3])  # 可以重定义尺寸，防止输入尺寸不是规定的224X224
img = img.astype(np.float32)

# 求解预测值
preb = sess.run(fc8_fintuning, feed_dict={x: [img]})
# 取得最大预测值索引
max_index = np.argmax(preb)
print(preb)
# 打印预测结果
print("判断为%d米" % (5*max_index))
print("置信度为：", preb[:, max_index])

cumtbZhaozy

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
2
评论
TF中建立vgg16并实现七分类

实验综述1.基于vgg16模型进行迁移学习；因为总体数据集偏小，所以只训练了最后一层的参数，而冻结所有其他层；当然当数据集增大时，可以向前多结冻几层网络进行训练。2.总体的实施步骤为：（1）处理数据（2）建立vgg16网络模型类（3）利用网络模型类实现模型复用、载入数据、训练模型与权重保存 ...
复制链接

扫一扫