观看莫烦老师《迁移学习》,利用VGG实现一个判断老虎和猫的尺寸

"""
This is a simple example of transfer learning using VGG.
Fine tune a CNN from a classifier to regressor.
Generate some fake data for describing cat and tiger length.

Fake length setting:
Cat - Normal distribution (40, 8)
Tiger - Normal distribution (100, 30)

The VGG model and parameters are adopted from:
https://github.com/machrisaa/tensorflow-vgg

Learn more, visit my tutorial site: [莫烦Python](https://morvanzhou.github.io)
"""

from urllib.request import urlretrieve
import os
import numpy as np
import tensorflow as tf
import skimage.io
import skimage.transform
import matplotlib.pyplot as plt


# def download():     # download tiger and kittycat image
#     categories = ['tiger', 'kittycat']
#     for category in categories:
#         os.makedirs('./for_transfer_learning/data/%s' % category, exist_ok=True)
#         with open('./for_transfer_learning/imagenet_%s.txt' % category, 'r') as file:
#             urls = file.readlines()
#             n_urls = len(urls)
#             for i, url in enumerate(urls):
#                 try:
#                     urlretrieve(url.strip(), './for_transfer_learning/data/%s/%s' % (category, url.strip().split('/')[-1]))
#                     print('%s %i/%i' % (category, i, n_urls))
#                 except:
#                     print('%s %i/%i' % (category, i, n_urls), 'no image')


def load_img(path):
    img = skimage.io.imread(path)
    img = img / 255.0
    # print "Original Image Shape: ", img.shape
    # we crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    # resize to 224, 224
    resized_img = skimage.transform.resize(crop_img, (224, 224))[None, :, :, :]   # shape [1, 224, 224, 3]
    return resized_img


def load_data():
    imgs = {'tiger': [], 'kittycat': []}
    for k in imgs.keys():
        dir = 'D:/VGG_practice/for_transfer_learning/data/' + k
        for file in os.listdir(dir):
            if not file.lower().endswith('.jpg'):
                continue
            try:
                resized_img = load_img(os.path.join(dir, file))
            except OSError:
                continue
            imgs[k].append(resized_img)    # [1, height, width, depth] * n
            if len(imgs[k]) == 400:        # only use 400 imgs to reduce my memory load
                break
    # fake length data for tiger and cat
    tigers_y = np.maximum(20, np.random.randn(len(imgs['tiger']), 1) * 30 + 100)
    cat_y = np.maximum(10, np.random.randn(len(imgs['kittycat']), 1) * 8 + 40)
    return imgs['tiger'], imgs['kittycat'], tigers_y, cat_y


class Vgg16:
    vgg_mean = [103.939, 116.779, 123.68]
    print(3)
    def __init__(self, vgg16_npy_path=None, restore_from=None):
        # pre-trained parameters
        try:
            self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
            print(Vgg16)
        except FileNotFoundError:
            print('Please download VGG16 parameters from here https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM\nOr from my Baidu Cloud: https://pan.baidu.com/s/1Spps1Wy0bvrQHH2IMkRfpg')

        self.tfx = tf.placeholder(tf.float32, [None, 224, 224, 3])
        self.tfy = tf.placeholder(tf.float32, [None, 1])

        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=self.tfx * 255.0)
        bgr = tf.concat(axis=3, values=[
            blue - self.vgg_mean[0],
            green - self.vgg_mean[1],
            red - self.vgg_mean[2],
        ])

        # pre-trained VGG layers are fixed in fine-tune
        conv1_1 = self.conv_layer(bgr, "conv1_1")
        conv1_2 = self.conv_layer(conv1_1, "conv1_2")
        pool1 = self.max_pool(conv1_2, 'pool1')

        conv2_1 = self.conv_layer(pool1, "conv2_1")
        conv2_2 = self.conv_layer(conv2_1, "conv2_2")
        pool2 = self.max_pool(conv2_2, 'pool2')

        conv3_1 = self.conv_layer(pool2, "conv3_1")
        conv3_2 = self.conv_layer(conv3_1, "conv3_2")
        conv3_3 = self.conv_layer(conv3_2, "conv3_3")
        pool3 = self.max_pool(conv3_3, 'pool3')

        conv4_1 = self.conv_layer(pool3, "conv4_1")
        conv4_2 = self.conv_layer(conv4_1, "conv4_2")
        conv4_3 = self.conv_layer(conv4_2, "conv4_3")
        pool4 = self.max_pool(conv4_3, 'pool4')

        conv5_1 = self.conv_layer(pool4, "conv5_1")
        conv5_2 = self.conv_layer(conv5_1, "conv5_2")
        conv5_3 = self.conv_layer(conv5_2, "conv5_3")
        pool5 = self.max_pool(conv5_3, 'pool5')

        # detach original VGG fc layers and
        # reconstruct your own fc layers serve for your own purpose
        self.flatten = tf.reshape(pool5, [-1, 7*7*512])
        self.fc6 = tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6')
        self.out = tf.layers.dense(self.fc6, 1, name='out')

        self.sess = tf.Session()
        if restore_from:
            saver = tf.train.Saver()
            saver.restore(self.sess, restore_from)
        else:   # training graph
            self.loss = tf.losses.mean_squared_error(labels=self.tfy, predictions=self.out)
            self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss)
            self.sess.run(tf.global_variables_initializer())

    def max_pool(self, bottom, name):
        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

    def conv_layer(self, bottom, name):
        with tf.variable_scope(name):   # CNN's filter is constant, NOT Variable that can be trained
            conv = tf.nn.conv2d(bottom, self.data_dict[name][0], [1, 1, 1, 1], padding='SAME')
            lout = tf.nn.relu(tf.nn.bias_add(conv, self.data_dict[name][1]))
            return lout

    def train(self, x, y):
        loss, _ = self.sess.run([self.loss, self.train_op], {self.tfx: x, self.tfy: y})
        return loss

    def predict(self, paths):
        fig, axs = plt.subplots(1, 2)
        for i, path in enumerate(paths):
            x = load_img(path)
            length = self.sess.run(self.out, {self.tfx: x})
            axs[i].imshow(x[0])
            axs[i].set_title('Len: %.1f cm' % length)
            axs[i].set_xticks(()); axs[i].set_yticks(())
        plt.show()

    def save(self, path='D:/VGG_practice/for_transfer_learning/model/transfer_learn'):
        saver = tf.train.Saver()
        saver.save(self.sess, path, write_meta_graph=False)


def train():
    tigers_x, cats_x, tigers_y, cats_y = load_data()

    # plot fake length distribution
    plt.hist(tigers_y, bins=20, label='Tigers')
    plt.hist(cats_y, bins=10, label='Cats')
    plt.legend()
    plt.xlabel('length')
    print(4)
    plt.show()
    print(5)
    xs = np.concatenate(tigers_x + cats_x, axis=0)
    ys = np.concatenate((tigers_y, cats_y), axis=0)
    print(1)
    vgg = Vgg16(vgg16_npy_path='D:/VGG_practice/for_transfer_learning/vgg16.npy')
    print(2)
    print('Net built')
    for i in range(100):
        b_idx = np.random.randint(0, len(xs), 6)
        train_loss = vgg.train(xs[b_idx], ys[b_idx])
        print(i, 'train loss: ', train_loss)

    vgg.save('D:/VGG_practice/for_transfer_learning/model/transfer_learn')      # save learned fc layers


def eval():
    vgg = Vgg16(vgg16_npy_path='D:/VGG_practice/for_transfer_learning/vgg16.npy',restore_from='D:/VGG_practice/for_transfer_learning/model/transfer_learn')

    vgg.predict(
        ['D:/VGG_practice/for_transfer_learning/data/kittycat/000129037.jpg', 'D:/VGG_practice/for_transfer_learning/data/tiger/391412.jpg'])


if __name__ == '__main__':
    # download()
    # train()
    eval()

   上面是莫烦老师上传的代码,我试着运行了一下,因为tigers和cats的图片我已经通过百度云下载了,所以def download()中的代码我就没有用,因此注释了起来。

   所谓“迁移学习”就是站在巨人的肩膀上,“他山之石,可以攻玉”,比如这段代码,主要部分就是利用了VGG网络,(self.flatten之前的部分)只是把最后的全连接层为了实现自己的目的做出了改变。

   虽然这个代码很简单,但是我调试了一下午才成功,(菜鸟实在伤不起--!) 

   后来想了一下之前失败的原因,有两种可能:1.VGG那个模型没有下载好 2.CPU跑代码需要一些时间,之前为了了解代码的运行过程,我用print输出了一些数字,发现到plt.show()输出一张柱状图之后就不往下继续走了,就这样不断调试了好长时间,后来我在它显示出了一张图片以后就没有管它,在关掉那张图片以后,程序就开始跑起来了。。。。(真的醉了)哈哈哈哈。。。

 今天算是我第一次些博客,虽然自己是个菜鸟,但是梦想还是要有的,别忘了“亮剑”精神哦!

下图是运行的结果

   

这是之前输入的猫和老虎的尺寸数据

它开始跑起来了。。。

 出现了这个警告。。。

运行结束了

大功告成啦!(这也算扬名立万的第一步。。。。)

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
本课程适合具有一定深度学习基础,希望发展为深度学习之计算机视觉方向的算法工程师和研发人员的同学们。基于深度学习的计算机视觉是目前人工智能最活跃的领域,应用非常广泛,如人脸识别和无人驾驶中的机器视觉等。该领域的发展日新月异,网络模型和算法层出不穷。如何快速入门并达到可以从事研发的高度对新手和中级水平的学生而言面临不少的挑战。精心准备的本课程希望帮助大家尽快掌握基于深度学习的计算机视觉的基本原理、核心算法和当前的领先技术,从而有望成为深度学习之计算机视觉方向的算法工程师和研发人员。本课程系统全面地讲述基于深度学习的计算机视觉技术的原理并进行项目实践。课程涵盖计算机视觉的七大任务,包括图像分类、目标检测、图像分割(语义分割、实例分割、全景分割)、人脸识别、图像描述、图像检索、图像生成(利用生成对抗网络)。本课程注重原理和实践相结合,逐篇深入解读经典和前沿论文70余篇,图文并茂破译算法难点, 使用思维导图梳理技术要点。项目实践使用Keras框架(后端为Tensorflow),学员可快速上手。通过本课程的学习,学员可把握基于深度学习的计算机视觉的技术发展脉络,掌握相关技术原理和算法,有助于开展该领域的研究与开发实战工作。另外,深度学习之计算机视觉方向的知识结构及学习建议请参见本人CSDN博客。本课程提供课程资料的课件PPT(pdf格式)和项目实践代码,方便学员学习和复习。本课程分为上下两部分,其中上部包含课程的前五章(课程介绍、深度学习基础、图像分类、目标检测、图像分割),下部包含课程的后四章(人脸识别、图像描述、图像检索、图像生成)。
好的,实现狗识别任务需要进行迁移学习,使用预训练的VGG16模型可以帮助我们快速搭建一个高效的卷积神经网络。 以下是基本的实现步骤: 1. 导入相关库 ```python import os import numpy as np from keras.preprocessing.image import load_img, img_to_array from keras.applications.vgg16 import VGG16, preprocess_input from keras.models import Model from keras.layers import Dense, Dropout, Flatten from keras.optimizers import Adam from sklearn.model_selection import train_test_split ``` 2. 加载数据集 我们使用的数据集是Kaggle上的狗分类数据集,可以从以下链接下载:https://www.kaggle.com/c/dogs-vs-cats/data 数据集中包含25,000张大小不一的和狗的图片,我们需要将其转换为模型可以处理的格式。 ```python # 定义数据集根目录 data_dir = 'path/to/data' # 定义图片大小和批次大小 img_size = (224, 224) batch_size = 32 # 加载数据集并将其转换为模型可以处理的格式 def load_dataset(): X = [] Y = [] for file in os.listdir(data_dir): if file.endswith('.jpg'): img = load_img(os.path.join(data_dir, file), target_size=img_size) img = img_to_array(img) X.append(img) if 'cat' in file: Y.append(0) else: Y.append(1) return np.array(X), np.array(Y) X, Y = load_dataset() ``` 3. 划分数据集 我们将数据集划分为训练集、验证集和测试集。训练集用于训练模型,验证集用于调整模型参数,测试集用于评估模型性能。 ```python # 划分数据集 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42) ``` 4. 加载预训练模型 我们使用Keras中已经训练好的VGG16模型作为特征提取器,将其载入并输出模型结构。 ```python # 加载预训练模型 base_model = VGG16(include_top=False, weights='imagenet', input_shape=img_size+(3,)) # 输出模型结构 for layer in base_model.layers: print(layer.name, layer.input_shape, layer.output_shape) ``` 5. 冻结模型权重 我们将模型的卷积层权重冻结,只训练新添加的全连接层的权重。 ```python # 冻结模型权重 for layer in base_model.layers: layer.trainable = False ``` 6. 构建模型 我们在VGG16模型的顶部添加了几个全连接层,用于分类任务。 ```python # 添加新的全连接层 x = base_model.output x = Flatten()(x) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(1, activation='sigmoid')(x) # 构建新模型 model = Model(inputs=base_model.input, outputs=x) # 输出模型结构 model.summary() ``` 7. 训练模型 我们使用Adam优化器和二元交叉熵损失函数训练模型。 ```python # 编译模型 model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy']) # 训练模型 history = model.fit( preprocess_input(X_train), Y_train, batch_size=batch_size, epochs=10, validation_data=(preprocess_input(X_val), Y_val), verbose=1 ) ``` 8. 评估模型 我们使用测试集评估模型性能。 ```python # 评估模型 score = model.evaluate(preprocess_input(X_test), Y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值