>>> 通过mofanPython进行学习之后,自己测试了一下迁移学习;
>>> 这个是一个使用VGG16的简单迁移学习,在后面将进行微调将VGG的分类任务用来做回归;
>>>回归目标:用来预测老虎和猫的长度;
>>>伪造了长度信息:
... 猫:正态分布(40,8) #平均40cm, 方差为8
... 虎:正态分布(100,30) #平均100cm,方差为30
>>>VGG的模型和参数来自于Git:
https://github.com/machrisaa/tensorflow-vgg
>>>导入工具包:
1 from urllib.request import urlretrieve 2 import os 3 import numpy as np 4 import tensorflow as tf 5 import skimage.io 6 import skimage.transform 7 import cv2 8 import matplotlib.pyplot as plt
>>>构建数据集第一步:下载图片
def download(): # download tiger and kittycat image categories = ['tiger', 'kittycat'] for category in categories: os.makedirs('./for_transfer_learning/data/%s' % category, exist_ok=True) with open('./for_transfer_learning/imagenet_%s.txt' % category, 'r') as file: urls = file.readlines() n_urls = len(urls) for i, url in enumerate(urls): try: urlretrieve(url.strip(), './for_transfer_learning/data/%s/%s' % (category, url.strip().split('/')[-1])) print('%s %i/%i' % (category, i, n_urls)) except: print('%s %i/%i' % (category, i, n_urls), 'no image')
>>>读取图片并进行resize操作
def load_data(): imgs = {'tiger': [], 'kittycat': []} for k in imgs.keys(): dir = './for_transfer_learning/data/' + k for file in os.listdir(dir): if not file.lower().endswith('.jpg'): continue try: resized_img = load_img(os.path.join(dir, file)) except OSError: continue imgs[k].append(resized_img) # [1, height, width, depth] * n if len(imgs[k]) == 400: # only use 400 imgs to reduce my memory load break # fake length data for tiger and cat tigers_y = np.maximum(20, np.random.randn(len(imgs['tiger']), 1) * 30 + 100) cat_y = np.maximum(10, np.random.randn(len(imgs['kittycat']), 1) * 8 + 40) return imgs['tiger'], imgs['kittycat'], tigers_y, cat_y
>>>微调VGG,只对后面的全连接层进行训练
class Vgg16: vgg_mean = [103.939, 116.779, 123.68] def __init__(self, vgg16_npy_path=None, restore_from=None): # pre-trained parameters try: self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item() except FileNotFoundError: print('Please download VGG16 parameters from here https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM\nOr from my Baidu Cloud: https://pan.baidu.com/s/1Spps1Wy0bvrQHH2IMkRfpg') self.tfx = tf.placeholder(tf.float32, [None, 224, 224, 3]) self.tfy = tf.placeholder(tf.float32, [None, 1]) # Convert RGB to BGR red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=self.tfx * 255.0) bgr = tf.concat(axis=3, values=[ blue - self.vgg_mean[0], green - self.vgg_mean[1], red - self.vgg_mean[2], ]) # pre-trained VGG layers are fixed in fine-tune conv1_1 = self.conv_layer(bgr, "conv1_1") conv1_2 = self.conv_layer(conv1_1, "conv1_2") pool1 = self.max_pool(conv1_2, 'pool1') conv2_1 = self.conv_layer(pool1, "conv2_1") conv2_2 = self.conv_layer(conv2_1, "conv2_2") pool2 = self.max_pool(conv2_2, 'pool2') conv3_1 = self.conv_layer(pool2, "conv3_1") conv3_2 = self.conv_layer(conv3_1, "conv3_2") conv3_3 = self.conv_layer(conv3_2, "conv3_3") pool3 = self.max_pool(conv3_3, 'pool3') conv4_1 = self.conv_layer(pool3, "conv4_1") conv4_2 = self.conv_layer(conv4_1, "conv4_2") conv4_3 = self.conv_layer(conv4_2, "conv4_3") pool4 = self.max_pool(conv4_3, 'pool4') conv5_1 = self.conv_layer(pool4, "conv5_1") conv5_2 = self.conv_layer(conv5_1, "conv5_2") conv5_3 = self.conv_layer(conv5_2, "conv5_3") pool5 = self.max_pool(conv5_3, 'pool5') # detach original VGG fc layers and # reconstruct your own fc layers serve for your own purpose self.flatten = tf.reshape(pool5, [-1, 7*7*512]) self.fc6 = tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') self.out = tf.layers.dense(self.fc6, 1, name='out') self.sess = tf.Session() if restore_from: saver = tf.train.Saver() saver.restore(self.sess, restore_from) else: # training graph self.loss = tf.losses.mean_squared_error(labels=self.tfy, predictions=self.out) self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss) self.sess.run(tf.global_variables_initializer()) def max_pool(self, bottom, name): return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def conv_layer(self, bottom, name): with tf.variable_scope(name): # CNN's filter is constant, NOT Variable that can be trained conv = tf.nn.conv2d(bottom, self.data_dict[name][0], [1, 1, 1, 1], padding='SAME') lout = tf.nn.relu(tf.nn.bias_add(conv, self.data_dict[name][1])) return lout def train(self, x, y): loss, _ = self.sess.run([self.loss, self.train_op], {self.tfx: x, self.tfy: y}) return loss def predict(self, paths): fig, axs = plt.subplots(1, 2) for i, path in enumerate(paths): x = load_img(path) length = self.sess.run(self.out, {self.tfx: x}) axs[i].imshow(x[0]) axs[i].set_title('Len: %.1f cm' % length) axs[i].set_xticks(()); axs[i].set_yticks(()) plt.show() def save(self, path='./for_transfer_learning/model/transfer_learn'): saver = tf.train.Saver() saver.save(self.sess, path, write_meta_graph=False)
>>>进行训练,输出Loss, epoch为100, batchsize为6
def train(): tigers_x, cats_x, tigers_y, cats_y = load_data() # plot fake length distribution plt.hist(tigers_y, bins=20, label='Tigers') plt.hist(cats_y, bins=10, label='Cats') plt.legend() plt.xlabel('length') plt.show() xs = np.concatenate(tigers_x + cats_x, axis=0) ys = np.concatenate((tigers_y, cats_y), axis=0) vgg = Vgg16(vgg16_npy_path='./for_transfer_learning/vgg16.npy') print('Net built') for i in range(100): b_idx = np.random.randint(0, len(xs), 6) train_loss = vgg.train(xs[b_idx], ys[b_idx]) print(i, 'train loss: ', train_loss) vgg.save('./for_transfer_learning/model/transfer_learn') # save learned fc layers
>>>进行预测
def eval(): vgg = Vgg16(vgg16_npy_path='./for_transfer_learning/vgg16.npy', restore_from='./for_transfer_learning/model/transfer_learn') vgg.predict( ['./for_transfer_learning/data/kittycat/000129037.jpg', './for_transfer_learning/data/tiger/391412.jpg']) if __name__ == '__main__': # download() # train() eval()