深度学习--VGG网络处理过程分析

本文链接：https://blog.csdn.net/u014106644/article/details/89091949

VGG-Net的结构图，来自论文《VERY DEEP CONVOLUTIONAL NETWORK SFORLARGE-SCALE IMAGE RECOGNITION》，发表于ICLR 2015上，比较起ALEXNET,VGG对图片有更精确的估值以及更省空间。其具体网络结构如下所示：

下载imagenet-vgg-verydeep-19.mat模型数据文件，加载模型文件，进行数据架构解析：

import scipy.io
import numpy as np
import os 
import scipy.misc 
import matplotlib.pyplot as plt 
import tensorflow as tf
import cmd
from cmd import Cmd

cwd  = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
print("data", data.keys())

输出为：可知data数据结构为字典类型，这里主要关注layers以及normalization类型。

data dict_keys(['__globals__', 'classes', 'layers', 'normalization', '__header__', '__version__'])

由以上网络结构可知，输入图片为224*224*3的彩色图片，观察一下data['normalization']数据结构

print(data['normalization'][0][0][0].shape)

其大小刚好对应图片的均值：

(224, 224, 3)

再观察一下data['layers']数据结构，debug模式有：

其中layers具体结构：可知有43个数组元素：

第一个数组元素为卷积层1

print("conv1", data['layers'][0][0])

array([[1., 1., 1., 1.]]), array(['conv'], dtype='<U4'), array(['conv1_1'], dtype='<U7'), array([[1., 1.]])

第二个为relu层：

print("relu", data['layers'][0][1])

relu [[(array(['relu'], dtype='<U4'), array(['relu1_1'], dtype='<U7'))]]

由以上分析可知，data['layers'][0][i]为某一层的具体信息数组，其中每个元素均为array数组，外面又两层虚括号，去掉，依次遍历每一层，获取VGG网络的网络结构如下：其中倒数第二个array为该层的名称信息

for i in range(0, 43):
    print(i+1, data['layers'][0][i][0][0][-2])

最终输出为：

1 ['conv1_1']
2 ['relu']
3 ['conv1_2']
4 ['relu']
5 ['max']
6 ['conv2_1']
7 ['relu']
8 ['conv2_2']
9 ['relu']
10 ['max']
11 ['conv3_1']
12 ['relu']
13 ['conv3_2']
14 ['relu']
15 ['conv3_3']
16 ['relu']
17 ['conv3_4']
18 ['relu']
19 ['max']
20 ['conv4_1']
21 ['relu']
22 ['conv4_2']
23 ['relu']
24 ['conv4_3']
25 ['relu']
26 ['conv4_4']
27 ['relu']
28 ['max']
29 ['conv5_1']
30 ['relu']
31 ['conv5_2']
32 ['relu']
33 ['conv5_3']
34 ['relu']
35 ['conv5_4']
36 ['relu']
37 ['max']
38 ['fc6']
39 ['relu']
40 ['fc7']
41 ['relu']
42 ['fc8']
43 ['softmax']

可知网络结构主要由卷积层，relu层，max池化层，fc全连接层，softmax分类层。

VGG网络参数结构：

获取卷积层1的参数W,b：

print("conv1W", data['layers'][0][0][0][0][0])
print("conv1b", data['layers'][0][0][0][0][1])

利用VGG来观察卷积神经网络在处理图片时，各层对于图片的直观处理效果，代码如下：

利用VGG训练结果，输入一张待处理图片，观察图片在VGG网络中各层是如何变化的

import scipy.io
import numpy as np
import os 
import scipy.misc 
import matplotlib.pyplot as plt 
import tensorflow as tf
import cmd
from cmd import Cmd
from PIL import Image

#卷积层函数定义
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)
#池化层函数定义
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')
#图片数据预处理，均值化处理
def preprocess(image, mean_pixel):
    return image - mean_pixel
# #将图片加均值
# def unprocess(image, mean_pixel):
#     return image + mean_pixel
#读取某张图片数据
def imread(path):
    return scipy.misc.imread(path).astype(np.float)
# #保存图片数据
# def imsave(path, img):
#     img = np.clip(img, 0, 255).astype(np.uint8)
#     scipy.misc.imsave(path, img)
print ("Functions for VGG ready")

def net(data_path, input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'，'pool5',
    )
    data = scipy.io.loadmat(data_path)
    #print("data", data.keys())
    #print("layers", data['layers'])
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current
    assert len(net) == len(layers)
    return net, mean_pixel, layers
print ("Network for VGG ready")

cwd  = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
#图片路径
IMG_PATH = cwd + "/model/cat.jpg"
#加载待处理图片
input_image = imread(IMG_PATH)
print("input_image", input_image.shape)

#显示原始图片
img = Image.open(IMG_PATH)
plt.figure("origin pic")
plt.imshow(img)
plt.colorbar()
plt.show()

#根据输入图片，定义输入数据的规格
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2]) 
with tf.Session() as sess:
    #定义图片placeholder，用来预选定义图片输入数据规格
    image = tf.placeholder('float', shape=shape)
    #利用VGG网络来训练图片
    nets, mean_pixel, all_layers = net(VGG_PATH, image)
    #将图片进行均值化处理
    input_image_pre = np.array([preprocess(input_image, mean_pixel)])
    print(input_image_pre[0].shape)
    #显示去除均值化图片
    #im = Image.fromarray(input_image_pre)
    plt.figure("preprocess pic")
    plt.imshow(input_image_pre[0])
    plt.colorbar()
    plt.show()
    
    layers = all_layers # For all layers 
    # layers = ('relu2_1', 'relu3_1', 'relu4_1')
    for i, layer in enumerate(layers):
        print ("[%d/%d] %s" % (i+1, len(layers), layer))
        features = nets[layer].eval(feed_dict={image: input_image_pre})
        
        print (" Type of 'features' is ", type(features))
        print (" Shape of 'features' is %s" % (features.shape,))
        # Plot response 
        if 1:
            plt.figure(i+1, figsize=(10, 5))
            #plt.matshow(features[0, :, :, 0], cmap=plt.cm.gray, fignum=i+1)
            plt.matshow(features[0, :, :, 0], fignum=i+1)
            plt.title("" + layer)
            plt.colorbar()
            plt.show()

运行上述程序，依次观察：

原图片以及均值化处理图片

卷积层1以及relu激活函数2

卷积层3以及激活函数4

池化层5

卷积层6以及激活函数7

卷积层8以及激活函数9

池化层10

卷积11，激活函数12，卷积13，激活14

卷积15，激活16，卷积17，激活18

池化19

卷积20 激活21 卷积22 激活23

卷积24 激活25 卷积26 激活27

池化28

卷积29 激活30 卷积31 激活32

卷积33 激活34 卷积35 激活36

池化37

在各层变化过程中，数据规格是如何变化的

[1/37] conv1_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[2/37] relu1_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[3/37] conv1_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[4/37] relu1_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[5/37] pool1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 64)
[6/37] conv2_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[7/37] relu2_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[8/37] conv2_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[9/37] relu2_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[10/37] pool2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 128)
[11/37] conv3_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[12/37] relu3_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[13/37] conv3_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[14/37] relu3_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[15/37] conv3_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[16/37] relu3_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[17/37] conv3_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[18/37] relu3_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[19/37] pool3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 256)
[20/37] conv4_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[21/37] relu4_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[22/37] conv4_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[23/37] relu4_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[24/37] conv4_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[25/37] relu4_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[26/37] conv4_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[27/37] relu4_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[28/37] pool4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[29/37] conv5_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[30/37] relu5_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[31/37] conv5_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[32/37] relu5_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[33/37] conv5_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[34/37] relu5_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[35/37] conv5_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[36/37] relu5_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[37/37] pool5
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 16, 16, 512)

参考链接：

https://blog.csdn.net/cskywit/article/details/79185792

唐宇迪深度学习