采用迁移学习,我们必须严格按照预训练模型初始训练时的方式归一化,而且不同模型的归一化方式不同,VGG和ResNet是减去训练集均值,Xception和InceptionV3是线性缩放到[-1,1];
网络默认输入为:
VGG和ResNet是224*224*3
Xception和InceptionV3是299*299*3
tensorflow数据预处理
tensorflow使用slim库对数据进行预处理,各模型和预处理方式对应如下:
preprocessing_fn_map = {
'cifarnet': cifarnet_preprocessing,
'inception': inception_preprocessing,
'inception_v1': inception_preprocessing,
'inception_v2': inception_preprocessing,
'inception_v3': inception_preprocessing,
'inception_v4': inception_preprocessing,
'inception_resnet_v2': inception_preprocessing,
'lenet': lenet_preprocessing,
'mobilenet_v1': inception_preprocessing,
'mobilenet_v2': inception_preprocessing,
'mobilenet_v2_035': inception_preprocessing,
'mobilenet_v2_140': inception_preprocessing,
'nasnet_mobile': inception_preprocessing,
'nasnet_large': inception_preprocessing,
'pnasnet_mobile': inception_preprocessing,
'pnasnet_large': inception_preprocessing,
'resnet_v1_50': vgg_preprocessing,
'resnet_v1_101': vgg_preprocessing,
'resnet_v1_152': vgg_preprocessing,
'resnet_v1_200': vgg_preprocessing,
'resnet_v2_50': vgg_preprocessing,
'resnet_v2_101': vgg_preprocessing,
'resnet_v2_152': vgg_preprocessing,
'resnet_v2_200': vgg_preprocessing,
'vgg': vgg_preprocessing,
'vgg_a': vgg_preprocessing,
'vgg_16': vgg_preprocessing,
'vgg_19': vgg_preprocessing,
}
代码实例:
import numpy as np
import tensorflow as tf
from preprocessing import vgg_preprocessing, inception_preprocessing
from tensorflow.contrib import slim
import sys
sys.path.append("D:\\人工智能学习资料\\github\\models\\research\\slim")
from nets.resnet_v1 import *
from nets.resnet_v2 import *
n_images = 50000
batch_size = 100
n_top = 1
model = "resnet_v2_50"
with tf.Graph().as_default():
with slim.arg_scope(resnet_arg_scope()):
variables_to_restore = slim.get_model_variables()
print(variables_to_restore)
input_string = tf.placeholder(tf.string)
input_images = tf.read_file(input_string)
input_images = tf.image.decode_jpeg(input_images, channels=3)
input_images = tf.cast(input_images, tf.float32)
if model == "resnet_v1_50":
processed_images = vgg_preprocessing.preprocess_image(input_images, 224, 224, is_training=False)
processed_images = tf.expand_dims(processed_images, 0)
logits, _ = resnet_v1_50(processed_images,
num_classes=1000,
is_training=False)
elif model == "resnet_v2_50":
processed_images = vgg_preprocessing.preprocess_image(input_images, 224, 224, is_training=False)
processed_images = tf.expand_dims(processed_images, 0)
logits, _ = resnet_v2_50(processed_images,
num_classes=1001,
is_training=False)
probabilities = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(checkpoint_file, slim.get_model_variables(model))
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
init_fn(sess)
labels_sort = []
success_search = 0
import time
start_time = time.time()
for b in range(int(np.ceil(n_images / np.float(batch_size)))):
start = b * batch_size + 1
stop = np.minimum(n_images + 1, start + batch_size)
nx = []
for i in range(start, stop, 1):
img_path = imges_path + "ILSVRC2012_val_%08d.JPEG" % (i)
x = imread(img_path).astype(float)
check_time = time.time()
pred = sess.run(probabilities, feed_dict={input_string: img_path})
x_gen = sess.run(processed_images, feed_dict={input_string: img_path})
if i == start:
preds = pred
else:
preds = np.concatenate([preds, pred], axis=0)
print(np.max(x_gen), np.min(x_gen))
labels = np.argsort(preds, axis=1)[:, ::-1]
labels_ntop = labels[:, :n_top]
for idx in range(start, stop, 1):
if PIE_TRUTH[idx - 1] in labels_ntop[idx - start, :]:
success_search += 1
print("Process %d images on %d query, suceess %d images, %0.2f take time %s" % \
(stop, n_images, success_search, 100 * success_search / np.float(stop), check_time - start_time))
Kares数据预处理
kares中数据预处理相对比较简单,使用各个模型封装好的preprocess_input即可
实例:
from keras.applications.resnet50 import ResNet50,preprocess_input,decode_predictions
from keras.preprocessing import image
def path_to_tensor(img_path):
# 用PIL加载RGB图像为PIL.Image.Image类型
img = image.load_img(img_path, target_size=(224, 224))
# 将PIL.Image.Image类型转化为格式为(224, 224, 3)的3维张量
x = image.img_to_array(img)
# 将3维张量转化为格式为(1, 224, 224, 3)的4维张量并返回
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x
image_path = 'kera_data/check/cat/cat.0.jpg'
image = path_to_tensor(image_path)
preds = ResNet_model.predict(img)
print(preds[0][0:20])
print(np.array(decode_predictions(preds,top=10)[0])[:,0])