VGG16详细实现(笨办法)

一、 VGG16简介

  • VGG16网络是通过卷积和全连接的方式提取图片特征,进行识别一种网络结构。曾在某年取得分类第二,定位任务第一的佳绩。
  • 其结构包含: 13个卷积层 + 3个全连接层,所以被称为VGG16,如下图绿色的部分即是网络的结构组成:
    在这里插入图片描述
  • 卷积配置说明
    • input(224224RGB image) :输入图片大小为:224224,通道数为:3
    • conv3_XXX :卷积核为3(以下都是卷积核都是3,后面省略),卷积层的通道数为:XXX
  • VGG16特点(这里是借鉴[韩鼎の个人网站]
    • 卷积核全部使用3*3的卷积核 ,且所有卷积核 : strides=[1,1,1,1] ,padding=“SAME”。
    • 池化层全部使用: max的池化方式,2*2的池化核,通过全0填充,步长为2,strides=[1,2,2,1] ,padding=“SAME”。

二、代码实现

import h5py
import json
import tensorflow as tf
import cv2
import numpy as np
import matplotlib.pyplot as plt

# 该文件包括最后三层的全连接层
# 这里的h5文件和下面的json文件下载来自于kaggle:链接为:https://www.kaggle.com/keras/vgg16
model_path = './kreas_vgg16_model_para/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
# 该文件不包含最后三层的全连接层
# model_path = './kreas_vgg16_model_para/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'


# 1.通过h5py读取model的各层参数,笨办法就主要笨在这个地方,但是很好理解。
with h5py.File(model_path,'r') as f:
    # # 得到h5文件的第一层目录名字
    # layer1 = [ i for i in f]
    # print(layer1)
    # # 得到h5文件的第二层目录名字
    # layer2 = {'layer2_'+str(i):[j for j in f[layer1[i]]] for i in range(len(layer1))}
    # print(layer2)

    # 13层convolution 5个block  卷积核kernel为3*3*3
    # block1
    block1_conv1_W_1 = f['/block1_conv1/block1_conv1_W_1:0'][:]
    # print(block1_conv1_W_1.shape)
    block1_conv2_W_1 = f['/block1_conv2/block1_conv2_W_1:0'][:]
    # print(block1_conv2_W_1.shape)
    block1_conv1_b_1 = f['/block1_conv1/block1_conv1_b_1:0'][:]
    block1_conv2_b_1 = f['/block1_conv2/block1_conv2_b_1:0'][:]

    # block2
    block2_conv1_W_1 = f['/block2_conv1/block2_conv1_W_1:0'][:]
    # print(block2_conv1_W_1.shape)
    block2_conv2_W_1 = f['/block2_conv2/block2_conv2_W_1:0'][:]
    # print(block2_conv2_W_1.shape)
    block2_conv1_b_1 = f['/block2_conv1/block2_conv1_b_1:0'][:]
    block2_conv2_b_1 = f['/block2_conv2/block2_conv2_b_1:0'][:]

    # block3
    block3_conv1_W_1 = f['/block3_conv1/block3_conv1_W_1:0'][:]
    # print(block3_conv1_W_1.shape)
    block3_conv2_W_1 = f['/block3_conv2/block3_conv2_W_1:0'][:]
    # print(block3_conv2_W_1.shape)
    block3_conv3_W_1 = f['/block3_conv3/block3_conv3_W_1:0'][:]
    # print(block3_conv3_W_1.shape)
    block3_conv1_b_1 = f['/block3_conv1/block3_conv1_b_1:0'][:]
    block3_conv2_b_1 = f['/block3_conv2/block3_conv2_b_1:0'][:]
    block3_conv3_b_1 = f['/block3_conv3/block3_conv3_b_1:0'][:]

    # block4
    block4_conv1_W_1 = f['/block4_conv1/block4_conv1_W_1:0'][:]
    block4_conv2_W_1 = f['/block4_conv2/block4_conv2_W_1:0'][:]
    block4_conv3_W_1 = f['/block4_conv3/block4_conv3_W_1:0'][:]
    block4_conv1_b_1 = f['/block4_conv1/block4_conv1_b_1:0'][:]
    block4_conv2_b_1 = f['/block4_conv2/block4_conv2_b_1:0'][:]
    block4_conv3_b_1 = f['/block4_conv3/block4_conv3_b_1:0'][:]

    # block5
    block5_conv1_W_1 = f['/block5_conv1/block5_conv1_W_1:0'][:]
    block5_conv2_W_1 = f['/block5_conv2/block5_conv2_W_1:0'][:]
    block5_conv3_W_1 = f['/block5_conv3/block5_conv3_W_1:0'][:]
    block5_conv1_b_1 = f['/block5_conv1/block5_conv1_b_1:0'][:]
    block5_conv2_b_1 = f['/block5_conv2/block5_conv2_b_1:0'][:]
    block5_conv3_b_1 = f['/block5_conv3/block5_conv3_b_1:0'][:]

    # 三个全连接层
    fc1_W_1 = f['/fc1/fc1_W_1:0'][:]
    fc1_b_1 = f['/fc1/fc1_b_1:0'][:]
    # print(fc1_W_1.shape)

    fc2_W_1 = f['/fc2/fc2_W_1:0'][:]
    fc2_b_1 = f['/fc2/fc2_b_1:0'][:]
    # print(fc2_W_1.shape)

    predictions_W_1 = f['/predictions/predictions_W_1:0'][:]
    predictions_b_1 = f['/predictions/predictions_b_1:0'][:]
    # print(predictions_W_1.shape)

    
# 2.把拿到的参数交给网络结构,就可以做预测了,得到softmax后的千分类。
def prediction(image):

    # 13个卷积层
    # block 1
    conv1 = conv2d(image,block1_conv1_W_1)
    relu1 = tf.nn.relu(tf.nn.bias_add(conv1,block1_conv1_b_1))

    conv2 = conv2d(relu1,block1_conv2_W_1)
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2,block1_conv2_b_1))
    pool1 = max_pool_2x2(relu2)

    # block 2
    conv3 = conv2d(pool1,block2_conv1_W_1)
    relu3 = tf.nn.relu(tf.nn.bias_add(conv3,block2_conv1_b_1))


    conv4 = conv2d(relu3,block2_conv2_W_1)
    relu4 = tf.nn.relu(tf.nn.bias_add(conv4,block2_conv2_b_1))
    pool2 = max_pool_2x2(relu4)

    # block 3
    conv5 = conv2d(pool2,block3_conv1_W_1)
    relu5 = tf.nn.relu(tf.nn.bias_add(conv5,block3_conv1_b_1))


    conv6 = conv2d(relu5,block3_conv2_W_1)
    relu6 = tf.nn.relu(tf.nn.bias_add(conv6,block3_conv2_b_1))


    conv7 = conv2d(relu6,block3_conv3_W_1)
    relu7 = tf.nn.relu(tf.nn.bias_add(conv7,block3_conv3_b_1))
    pool3 = max_pool_2x2(relu7)

    # block 4
    conv8 = conv2d(pool3,block4_conv1_W_1)
    relu8 = tf.nn.relu(tf.nn.bias_add(conv8,block4_conv1_b_1))


    conv9 = conv2d(relu8,block4_conv2_W_1)
    relu9 = tf.nn.relu(tf.nn.bias_add(conv9,block4_conv2_b_1))


    conv10 = conv2d(relu9,block4_conv3_W_1)
    relu10 = tf.nn.relu(tf.nn.bias_add(conv10,block4_conv3_b_1))
    pool4 = max_pool_2x2(relu10)

    # block 5
    conv11 = conv2d(pool4,block5_conv1_W_1)
    relu11 = tf.nn.relu(tf.nn.bias_add(conv11,block5_conv1_b_1))

    conv12 = conv2d(relu11,block5_conv2_W_1)
    relu12 = tf.nn.relu(tf.nn.bias_add(conv12,block5_conv2_b_1))

    conv13 = conv2d(relu12,block5_conv3_W_1)
    relu13 = tf.nn.relu(tf.nn.bias_add(conv13,block5_conv3_b_1))
    pool5 = max_pool_2x2(relu13)
    # print(pool5.shape)  # (1, 7, 7, 512)

    pool_shape = pool5.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool5,[pool_shape[0],nodes])
    # print(reshaped.shape)  # (1, 25088)

    # 3个全连接层
    fc1 = tf.nn.relu(tf.nn.bias_add(tf.matmul(reshaped,fc1_W_1),fc1_b_1))

    fc2 =tf.nn.relu(tf.nn.bias_add(tf.matmul(fc1,fc2_W_1),fc2_b_1))

    y = tf.nn.softmax(tf.nn.bias_add(tf.matmul(fc2,predictions_W_1),predictions_b_1))
    # print(y.shape)  # (1, 1000)
    y = tf.squeeze(y)

    index = tf.argmax(y,0)

    labels_data = reafLabels()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        y_value = sess.run(y)
        # 将拿到的前五种可能,通过matplotlib可视化出来
        getTop5(y_value)
    return None

# 2.1 TOP5可视化
def getTop5(y_value):
	# 传入的参数y_value只是softmax后得到的ndarray
	# ndarray的内容是:各个...(有点难表述)
		# 举例:y_value = [0.02,0.01,0.4....]表示识别的该物品在index=0上的可能性是0.02,在index=1上的可能性是0.01,在index=2上的可能性是0.4...
    # 拿到可能性最大的前五个索引index,后面通过索引拿到可能的value
    top5_index = np.argsort(-y_value)[:5]
    # 读取下载好的json文件,json文件内容:包含索引和对应的value,打开一看便知
    labels_data = reafLabels()
    # top5 各自的可能性
    top5_accuracy = [(y_value[i]) for index in top5_index for i in range(len(y_value)) if index==i]
    # top5 各自的value
    top5_prediction = [value[1] for index in top5_index for key,value in labels_data.items()  if key==str(index)]
    # 画图
    plt.figure(figsize=(8,8),dpi=80)
    x_ticks = range(len(top5_prediction))
    # 柱状图
    plt.bar(x_ticks,top5_accuracy,width=0.3,color=["purple","orange","y","g","c"])
    # 标记数字
    for x, y in zip(x_ticks, top5_accuracy):
        plt.text(x , y + 0.005, '%s' % percent(y), ha='center', va='bottom')
    # 添加刻度上的值
    plt.xticks(x_ticks,top5_prediction)
    # 标题
    plt.title("PREDICTIONS TOP5")
    plt.show()
    return None


def reafLabels():
    with open('./kreas_vgg16_model_para/imagenet_class_index.json', 'r', encoding='utf8')as fp:
        labels_data = json.load(fp)
    return labels_data

def dealImage(image_path="./image/frog.jpg"):
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image,(224,224))
    # astype不改变numpy原数组,dtype容易出错
    retyped_image = resized_image.astype(np.float32)
    final_image = np.reshape(retyped_image,[1,224,224,3])
    # cv2.imshow('IMAGE',image)
    # cv2.waitKey(0)
    return final_image

def conv2d(x,w):
    return tf.nn.conv2d(x,w,[1,1,1,1],padding="SAME")

def max_pool_2x2(x):
    return tf.nn.max_pool(x,[1,2,2,1],[1,2,2,1],padding="SAME")

def percent(value):
    return "%.2f%%"%(value*100)

if __name__ == '__main__':
    image = dealImage()
    prediction(image)



三、效果展示

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述

四、参考链接

  1. 论文链接
  2. 韩鼎の个人网站
  • 4
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值