使用vgg19模型进行图片识别

最新推荐文章于 2023-08-15 00:42:53 发布

玉兔金兔

最新推荐文章于 2023-08-15 00:42:53 发布

阅读量2.8w

点赞数 2

分类专栏：机器学习

本文链接：https://blog.csdn.net/wyl1987527/article/details/68168115

版权

机器学习专栏收录该内容

18 篇文章

订阅专栏

目标：

1 使用vgg网络识别以下图片，识别结果中最大概率5分类名称和概率值是什么？
2 使用A图片输入vgg, 将其最后一层卷积网络 relu maxpool 生成的特征map 按激活度排序，保存激活度最高的4张为png。

资源准备 ：

1. 相关的 vgg 模型下载网址

http://www.vlfeat.org/matconvnet/models/beta16/

2.ImageNet 1000种分类以及排列

https://github.com/sh1r0/caffe-android-demo/blob/master/app/src/main/assets/synset_words.txt

3.vgg模型

说明：

1.使用vgg19模型进行图片识别

2.下载的是.mat 格式的vgg模型

具体代码

utils.py

import numpy as np
import os
import scipy.misc

def get_img(src, img_size=False):
img = scipy.misc.imread(src, mode='RGB') # misc.imresize(, (256, 256, 3))
if not (len(img.shape) == 3 and img.shape[2] == 3):
   img = np.dstack((img,img,img))
if img_size != False:
   img = scipy.misc.imresize(img, img_size)
return img

def list_files(in_path):
files = []
for (dirpath, dirnames, filenames) in os.walk(in_path):
      files.extend(filenames)
      break

return files

def _get_files(img_dir):
files = list_files(img_dir)
return [os.path.join(img_dir,x) for x in files]

def save_img(out_path, img):
img = np.clip(img, 0, 255).astype(np.uint8)
scipy.misc.imsave(out_path, img)

vgg.py

import tensorflow as tf
import numpy as np
import scipy.io
import pdb

MEAN_PIXEL = np.array([ 123.68 ,  116.779,  103.939])

def net(data_path, input_image):
layers = (
      'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

      'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

      'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
      'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

      'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
      'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

      'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
      'relu5_3', 'conv5_4', 'relu5_4', 'pool5',

      'fc6', 'relu6',

      'fc7', 'relu7',

      'fc8', 'softmax' #''prob'
)

data = scipy.io.loadmat(data_path)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = data['layers'][0]

net = {}
current = input_image
for i, name in enumerate(layers):
      kind = name[:4]
      if kind == 'conv':
         kernels, bias = weights[0][0][0][0]
         # matconvnet: weights are [width, height, in_channels, out_channels]
         # tensorflow: weights are [height, width, in_channels, out_channels]
         kernels = np.transpose(kernels, (1, 0, 2, 3))
         bias = bias.reshape(-1)
         current = _conv_layer(current, kernels, bias)
      elif kind == 'relu':
         current = tf.nn.relu(current)
      elif kind == 'pool':
         current = _pool_layer(current)
      elif kind == 'soft':
         current = _softmax_preds(current)

      kind2 = name[:2]
      if kind2 == 'fc':
         # print(weights)
         kernels, bias = weights[0][0][0][0]
         kernels = kernels.reshape(-1, kernels.shape[-1])
         bias = bias.reshape(-1)
         current = _fc_layer(current, kernels, bias)

      net[name] = current

assert len(net) == len(layers)
return net

def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
         padding='SAME')
return tf.nn.bias_add(conv, bias)

def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
         padding='SAME')

def _fc_layer(input, weights, bias):
shape = input.get_shape().as_list()
dim = 1
for d in shape[1:]:
      dim *= d
x = tf.reshape(input, [-1, dim])

Wx_plus = tf.matmul(x, weights)
fc = tf.nn.bias_add(Wx_plus, bias)
return fc

def _softmax_preds(input):
preds = tf.nn.softmax(input, name="prediction")
return preds

def preprocess(image):
return image - MEAN_PIXEL

def unprocess(image):
return image + MEAN_PIXEL

vgg_classfication.py
# encoding: UTF-8
import tensorflow as tf, numpy as np
from utils import _get_files, get_img
import vgg

def _get_allClassificationName(file_path):
f = open(file_path, 'r')
lines = f.readlines()
f.close()
return lines

if __name__ == "__main__":
lines = _get_allClassificationName('synset_words.txt')

images = _get_files('testImages/')

for i, imgPath in enumerate(images):
      image = get_img(imgPath, (224,224,3)).astype(np.float32)
      print(imgPath)
      image_pre = vgg.preprocess(image)
      # image_pre = image_pre.transpose((2, 0, 1))
      image_pre = np.expand_dims(image_pre, axis=0)

      image_preTensor = tf.convert_to_tensor(image_pre)
      image_preTensor = tf.to_float(image_preTensor)

      # Test pretrained model
      net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)

      preds = net['softmax']
      # nIndex = np.argmax(preds)

      with tf.Session() as sess:
         predsSortIndex = np.argsort(-preds[0].eval())

         for i in range(5):
            nIndex = predsSortIndex
            # print (nIndex)
            classificationName = lines[nIndex]
            problity = preds[0][nIndex]

            print (classificationName)
            print (problity.eval())

vgg_mapVisual.py

# encoding: utf-8
import tensorflow as tf, numpy as np
import os
from utils import _get_files, get_img, save_img
import vgg

if __name__ == "__main__":

image = get_img('testImages/Bird-catching-the-fly.jpg', (224, 224, 3)).astype(np.float32)
image_pre = vgg.preprocess(image)
image_pre = np.expand_dims(image_pre, axis=0)

image_preTensor = tf.convert_to_tensor(image_pre)
image_preTensor = tf.to_float(image_preTensor)

# Test pretrained model
net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)

maps = net['pool5']

mapSums = tf.reduce_sum(maps, [0, 1, 2])

with tf.Session() as sess:
      mapSumsIndex = np.argsort(-mapSums.eval())

      for i in range(4):
         nIndex = mapSumsIndex
         mapImage = maps[0,:,:,nIndex]

         sPath = "%s.png" % i
         save_img(sPath, mapImage.eval())

         print(nIndex)

实验结果

1 使用vgg网络识别以下图片，识别结果中最大概率5分类名称和概率值是什么？
答：
testImages/Bird-catching-the-fly.jpg
n02231487 walking stick, walkingstick, stick insect 0.103682
n01608432 kite               0.078659
n02236044 mantis, mantid    0.0750281
n01784675 centipede          0.0576766
n02226429 grasshopper, hopper 0.0568324

testImages/Crowd-of-people-008.jpg
n03598930 jigsaw puzzle      0.446802
n04435653 tile roof          0.222148
n04200800 shoe shop, shoe-shop, shoe store 0.126811
n03047690 clog, geta, patten, sabot 0.0362671
n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch 0.0311323

testImages/German_American_Kids_Bookshelf.jpg
n02870880 bookcase            0.49299
n02978881 cassette            0.224196
n03290653 entertainment center 0.0401046
n03529860 home theater, home theatre 0.0267266
n04392985 tape player    0.0224044

2 使用A图片输入vgg, 将其最后一层卷积网络 relu maxpool 生成的特征map 按激活度排序，保存激活度最高的4张为png。
答：
pool5 层激活度最高的4个map位置是   498、477、393、280，激活度分别为899.205、731.777、647.197、613.698。