深度学习--VGG网络处理过程分析

        VGG-Net的结构图,来自论文《VERY DEEP CONVOLUTIONAL NETWORK SFORLARGE-SCALE IMAGE RECOGNITION》,发表于ICLR 2015上,比较起ALEXNET,VGG对图片有更精确的估值以及更省空间。其具体网络结构如下所示:

         

下载imagenet-vgg-verydeep-19.mat模型数据文件,加载模型文件,进行数据架构解析:

import scipy.io
import numpy as np
import os 
import scipy.misc 
import matplotlib.pyplot as plt 
import tensorflow as tf
import cmd
from cmd import Cmd

cwd  = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
print("data", data.keys())

输出为:可知data数据结构为字典类型,这里主要关注layers以及normalization类型。

data dict_keys(['__globals__', 'classes', 'layers', 'normalization', '__header__', '__version__'])

由以上网络结构可知,输入图片为224*224*3的彩色图片,观察一下data['normalization']数据结构

print(data['normalization'][0][0][0].shape)

其大小刚好对应图片的均值:

(224, 224, 3)

再观察一下data['layers']数据结构,debug模式有:

                     

其中layers具体结构:可知有43个数组元素:

                                       

第一个数组元素为卷积层1

print("conv1", data['layers'][0][0])
array([[1., 1., 1., 1.]]), array(['conv'], dtype='<U4'), array(['conv1_1'], dtype='<U7'), array([[1., 1.]])

第二个为relu层:

print("relu", data['layers'][0][1])
relu [[(array(['relu'], dtype='<U4'), array(['relu1_1'], dtype='<U7'))]]

由以上分析可知,data['layers'][0][i]为某一层的具体信息数组,其中每个元素均为array数组,外面又两层虚括号,去掉,依次遍历每一层,获取VGG网络的网络结构如下:其中倒数第二个array为该层的名称信息

for i in range(0, 43):
    print(i+1, data['layers'][0][i][0][0][-2])

最终输出为:

1 ['conv1_1']
2 ['relu']
3 ['conv1_2']
4 ['relu']
5 ['max']
6 ['conv2_1']
7 ['relu']
8 ['conv2_2']
9 ['relu']
10 ['max']
11 ['conv3_1']
12 ['relu']
13 ['conv3_2']
14 ['relu']
15 ['conv3_3']
16 ['relu']
17 ['conv3_4']
18 ['relu']
19 ['max']
20 ['conv4_1']
21 ['relu']
22 ['conv4_2']
23 ['relu']
24 ['conv4_3']
25 ['relu']
26 ['conv4_4']
27 ['relu']
28 ['max']
29 ['conv5_1']
30 ['relu']
31 ['conv5_2']
32 ['relu']
33 ['conv5_3']
34 ['relu']
35 ['conv5_4']
36 ['relu']
37 ['max']
38 ['fc6']
39 ['relu']
40 ['fc7']
41 ['relu']
42 ['fc8']
43 ['softmax']

可知网络结构主要由卷积层,relu层,max池化层,fc全连接层,softmax分类层。

VGG网络参数结构:

     

获取卷积层1的参数W,b:

print("conv1W", data['layers'][0][0][0][0][0])
print("conv1b", data['layers'][0][0][0][0][1])

利用VGG来观察卷积神经网络在处理图片时,各层对于图片的直观处理效果,代码如下:

利用VGG训练结果,输入一张待处理图片,观察图片在VGG网络中各层是如何变化的

import scipy.io
import numpy as np
import os 
import scipy.misc 
import matplotlib.pyplot as plt 
import tensorflow as tf
import cmd
from cmd import Cmd
from PIL import Image

#卷积层函数定义
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)
#池化层函数定义
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')
#图片数据预处理,均值化处理
def preprocess(image, mean_pixel):
    return image - mean_pixel
# #将图片加均值
# def unprocess(image, mean_pixel):
#     return image + mean_pixel
#读取某张图片数据
def imread(path):
    return scipy.misc.imread(path).astype(np.float)
# #保存图片数据
# def imsave(path, img):
#     img = np.clip(img, 0, 255).astype(np.uint8)
#     scipy.misc.imsave(path, img)
print ("Functions for VGG ready")

def net(data_path, input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4','pool5',
    )
    data = scipy.io.loadmat(data_path)
    #print("data", data.keys())
    #print("layers", data['layers'])
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current
    assert len(net) == len(layers)
    return net, mean_pixel, layers
print ("Network for VGG ready")

cwd  = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
#图片路径
IMG_PATH = cwd + "/model/cat.jpg"
#加载待处理图片
input_image = imread(IMG_PATH)
print("input_image", input_image.shape)

#显示原始图片
img = Image.open(IMG_PATH)
plt.figure("origin pic")
plt.imshow(img)
plt.colorbar()
plt.show()

#根据输入图片,定义输入数据的规格
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2]) 
with tf.Session() as sess:
    #定义图片placeholder,用来预选定义图片输入数据规格
    image = tf.placeholder('float', shape=shape)
    #利用VGG网络来训练图片
    nets, mean_pixel, all_layers = net(VGG_PATH, image)
    #将图片进行均值化处理
    input_image_pre = np.array([preprocess(input_image, mean_pixel)])
    print(input_image_pre[0].shape)
    #显示去除均值化图片
    #im = Image.fromarray(input_image_pre)
    plt.figure("preprocess pic")
    plt.imshow(input_image_pre[0])
    plt.colorbar()
    plt.show()
    
    layers = all_layers # For all layers 
    # layers = ('relu2_1', 'relu3_1', 'relu4_1')
    for i, layer in enumerate(layers):
        print ("[%d/%d] %s" % (i+1, len(layers), layer))
        features = nets[layer].eval(feed_dict={image: input_image_pre})
        
        print (" Type of 'features' is ", type(features))
        print (" Shape of 'features' is %s" % (features.shape,))
        # Plot response 
        if 1:
            plt.figure(i+1, figsize=(10, 5))
            #plt.matshow(features[0, :, :, 0], cmap=plt.cm.gray, fignum=i+1)
            plt.matshow(features[0, :, :, 0], fignum=i+1)
            plt.title("" + layer)
            plt.colorbar()
            plt.show()

运行上述程序,依次观察:

原图片以及均值化处理图片

  

卷积层1以及relu激活函数2 

      

卷积层3以及激活函数4

   

池化层5

卷积层6以及激活函数7

   

卷积层8以及激活函数9

   

池化层10

卷积11,激活函数12,卷积13,激活14

    

卷积15,激活16,卷积17,激活18

   

池化19

卷积20 激活21 卷积22 激活23

   

卷积24 激活25 卷积26 激活27

   

池化28

卷积29 激活30 卷积31 激活32

   

卷积33 激活34 卷积35 激活36

   

池化37

在各层变化过程中,数据规格是如何变化的

[1/37] conv1_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[2/37] relu1_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[3/37] conv1_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[4/37] relu1_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 500, 500, 64)
[5/37] pool1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 64)
[6/37] conv2_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[7/37] relu2_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[8/37] conv2_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[9/37] relu2_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 250, 250, 128)
[10/37] pool2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 128)
[11/37] conv3_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[12/37] relu3_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[13/37] conv3_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[14/37] relu3_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[15/37] conv3_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[16/37] relu3_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[17/37] conv3_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[18/37] relu3_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 125, 125, 256)
[19/37] pool3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 256)
[20/37] conv4_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[21/37] relu4_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[22/37] conv4_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[23/37] relu4_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[24/37] conv4_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[25/37] relu4_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[26/37] conv4_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[27/37] relu4_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 63, 63, 512)
[28/37] pool4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[29/37] conv5_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[30/37] relu5_1
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[31/37] conv5_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[32/37] relu5_2
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[33/37] conv5_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[34/37] relu5_3
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[35/37] conv5_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[36/37] relu5_4
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 32, 32, 512)
[37/37] pool5
 Type of 'features' is  <class 'numpy.ndarray'>
 Shape of 'features' is (1, 16, 16, 512)

参考链接:

https://blog.csdn.net/cskywit/article/details/79185792

唐宇迪深度学习

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
人脸面部表情识别系统的代码实现,一般分为以下几个步骤: 1. 数据集准备:收集不同姿态、表情、光照等条件下的人脸数据,并进行数据增强和预处理,如旋转、翻转、缩放、归一化等。将数据集划分为训练集和测试集,并生成标签。 2. 特征提取:使用卷积神经网络(CNN)等模型,提取人脸图像中的特征,如颜色、形状、纹理等。常用的CNN模型有VGGNet、ResNet、Inception等。 3. 模型训练:使用带标签的数据集,训练深度学习模型。在训练过程中,需要考虑模型的复杂度、过拟合等问题。常用的优化算法有SGD、Adam等。 4. 模型评估和优化:使用测试数据集,评估模型的性能,如准确率、召回率、F1值等。根据评估结果,优化模型,调整参数、增加层数、改变网络结构等。 5. 部署和应用:将训练好的模型部署到实际应用中,如移动设备、Web应用等,实现人脸表情的实时识别和应用。 在具体实现上,可以使用Python语言和深度学习框架来实现。常用的深度学习框架有TensorFlow、PyTorch等。以下是一份使用PyTorch实现人脸面部表情识别系统的代码示例: ```python import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchvision import transforms, datasets # 数据集准备 train_transforms = transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = datasets.ImageFolder('data/train', transform=train_transforms) test_dataset = datasets.ImageFolder('data/test', transform=test_transforms) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=32) # 模型定义 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(256 * 28 * 28, 512) self.fc2 = nn.Linear(512, 7) def forward(self, x): x = self.pool(nn.functional.relu(self.conv1(x))) x = self.pool(nn.functional.relu(self.conv2(x))) x = self.pool(nn.functional.relu(self.conv3(x))) x = x.view(-1, 256 * 28 * 28) x = nn.functional.relu(self.fc1(x)) x = self.fc2(x) return x model = Net() # 模型训练 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01) for epoch in range(10): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 # 模型评估 correct = 0 total = 0 with torch.no_grad(): for data in test_loader: images, labels = data outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the test images: %d %%' % ( 100 * correct / total)) ``` 在上述代码中,我们首先使用PyTorch提供的数据集处理函数定义了训练集和测试集的预处理方式,然后使用`ImageFolder`类读取数据集。然后,我们定义了一个简单的卷积神经网络模型,并使用交叉熵损失函数和随机梯度下降优化算法进行模型训练。最后,使用测试集对模型进行评估,输出准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值