使用vgg19模型进行图片识别

目标:
1 使用vgg网络识别以下图片,识别结果中最大概率5分类名称和概率值是什么?
2 使用A图片输入vgg, 将其最后一层卷积网络 relu maxpool 生成的特征map 按激活度排序,保存激活度最高的4张为png。





资源准备
1. 相关的 vgg 模型下载网址

http://www.vlfeat.org/matconvnet/models/beta16/


2.ImageNet 1000种分类以及排列

https://github.com/sh1r0/caffe-android-demo/blob/master/app/src/main/assets/synset_words.txt


3.vgg模型



说明:

1.使用vgg19模型进行图片识别

2.下载的是.mat 格式的vgg模型



具体代码



utils.py

import numpy as np
import os
import scipy.misc

def get_img(src, img_size=False):
   img = scipy.misc.imread(src, mode='RGB') # misc.imresize(, (256, 256, 3))
   if not (len(img.shape) == 3 and img.shape[2] == 3):
       img = np.dstack((img,img,img))
   if img_size != False:
       img = scipy.misc.imresize(img, img_size)
   return img

def list_files(in_path):
    files = []
    for (dirpath, dirnames, filenames) in os.walk(in_path):
        files.extend(filenames)
        break

    return files

def _get_files(img_dir):
    files = list_files(img_dir)
    return [os.path.join(img_dir,x) for x in files]

def save_img(out_path, img):
    img = np.clip(img, 0, 255).astype(np.uint8)
    scipy.misc.imsave(out_path, img)






vgg.py

import tensorflow as tf
import numpy as np
import scipy.io
import pdb

MEAN_PIXEL = np.array([ 123.68 ,  116.779,  103.939])

def net(data_path, input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4', 'pool5',

        'fc6', 'relu6',

        'fc7', 'relu7',

        'fc8', 'softmax' #''prob'
    )

    data = scipy.io.loadmat(data_path)
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]

    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        elif kind == 'soft':
            current = _softmax_preds(current)

        kind2 = name[:2]
        if kind2 == 'fc':
            # print(weights)
            kernels, bias = weights[0][0][0][0]
            kernels = kernels.reshape(-1, kernels.shape[-1])
            bias = bias.reshape(-1)
            current = _fc_layer(current, kernels, bias)

        net[name] = current

    assert len(net) == len(layers)
    return net


def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)


def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')

def _fc_layer(input, weights, bias):
    shape = input.get_shape().as_list()
    dim = 1
    for d in shape[1:]:
        dim *= d
    x = tf.reshape(input, [-1, dim])

    Wx_plus = tf.matmul(x, weights)
    fc = tf.nn.bias_add(Wx_plus, bias)
    return fc

def _softmax_preds(input):
    preds = tf.nn.softmax(input, name="prediction")
    return preds

def preprocess(image):
    return image - MEAN_PIXEL


def unprocess(image):
    return image + MEAN_PIXEL





vgg_classfication.py
# encoding: UTF-8
import tensorflow as tf, numpy as np
from utils import _get_files, get_img
import vgg

def _get_allClassificationName(file_path):
    f = open(file_path, 'r')
    lines = f.readlines()
    f.close()
    return lines

if __name__ == "__main__":
    lines = _get_allClassificationName('synset_words.txt')

    images = _get_files('testImages/')

    for i, imgPath in enumerate(images):
        image = get_img(imgPath, (224,224,3)).astype(np.float32)
        print(imgPath)
        image_pre = vgg.preprocess(image)
        # image_pre = image_pre.transpose((2, 0, 1))
        image_pre = np.expand_dims(image_pre, axis=0)

        image_preTensor = tf.convert_to_tensor(image_pre)
        image_preTensor = tf.to_float(image_preTensor)

        # Test pretrained model
        net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)

        preds = net['softmax']
        # nIndex = np.argmax(preds)

        with tf.Session() as sess:
            predsSortIndex = np.argsort(-preds[0].eval())

            for i in range(5):
                nIndex = predsSortIndex
                # print (nIndex)
                classificationName = lines[nIndex]
                problity = preds[0][nIndex]

                print (classificationName)
                print (problity.eval())



vgg_mapVisual.py

# encoding: utf-8
import tensorflow as tf, numpy as np
import os
from utils import _get_files, get_img, save_img
import vgg

if __name__ == "__main__":

    image = get_img('testImages/Bird-catching-the-fly.jpg', (224, 224, 3)).astype(np.float32)
    image_pre = vgg.preprocess(image)
    image_pre = np.expand_dims(image_pre, axis=0)

    image_preTensor = tf.convert_to_tensor(image_pre)
    image_preTensor = tf.to_float(image_preTensor)

    # Test pretrained model
    net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)

    maps = net['pool5']

    mapSums = tf.reduce_sum(maps, [0, 1, 2])

    with tf.Session() as sess:
        mapSumsIndex = np.argsort(-mapSums.eval())

        for i in range(4):
            nIndex = mapSumsIndex
            mapImage = maps[0,:,:,nIndex]

            sPath = "%s.png" % i
            save_img(sPath, mapImage.eval())

            print(nIndex)



实验结果


1 使用vgg网络识别以下图片,识别结果中最大概率5分类名称和概率值是什么?
答:
testImages/Bird-catching-the-fly.jpg
n02231487 walking stick, walkingstick, stick insect   0.103682
n01608432 kite                0.078659
n02236044 mantis, mantid      0.0750281
n01784675 centipede           0.0576766
n02226429 grasshopper, hopper 0.0568324

testImages/Crowd-of-people-008.jpg
n03598930 jigsaw puzzle       0.446802
n04435653 tile roof           0.222148
n04200800 shoe shop, shoe-shop, shoe store 0.126811
n03047690 clog, geta, patten, sabot   0.0362671
n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch 0.0311323



testImages/German_American_Kids_Bookshelf.jpg
n02870880 bookcase             0.49299
n02978881 cassette             0.224196
n03290653 entertainment center 0.0401046
n03529860 home theater, home theatre 0.0267266
n04392985 tape player      0.0224044


2 使用A图片输入vgg, 将其最后一层卷积网络 relu maxpool 生成的特征map 按激活度排序,保存激活度最高的4张为png。
答:
pool5 层 激活度最高的4个map位置是    498、477、393、280,激活度分别为899.205、731.777、647.197、613.698。

图片尺寸大小是7*7,比较小,可下载看大图。










参考网址:



https://github.com/lengstrom/fast-style-transfer/tree/master/src

http://machinethink.net/blog/con ... iphone-with-vggnet/

https://github.com/USTCchenjl/vgg_face_gender

http://blog.csdn.net/u013473520/article/details/50730620

https://github.com/boyw165/tensorflow-vgg

http://blog.csdn.net/qq_16949707/article/details/54837376

http://blog.csdn.net/u013473520/article/details/50730620




评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值