前面的博客介绍了如何生成多label的训练数据,也介绍了测试时对图片处理需要注意的点,这篇博客就来介绍如何利用训练好的模型进行实际测试
官方版demo
import numpy as np
import os
import sys
import cv2
caffe_root = 'your caffe root'
sys.path.insert(0, caffe_root + '/python')
import caffe
caffe.set_device(1)
caffe.set_mode_gpu()
model_def = 'xxx_deploy.prototxt'
model_weights = 'xxx.caffemodel'
net = caffe.Net(model_def, model_weights, caffe.TEST)
net.blobs['data'].reshape(1,3,h,w) # (batch_size,c,h,w)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # (h,w,c)--->(c,h,w)
transformer.set_mean('data', np.array([104,117,123])) #注意是 BGR
'''
或者通过caffe_root/build/tools/compute_image_mean 计算图像均值得到xxx.npy
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) #对所有像素值取平均以此获取BGR的均值像素值
print 'mean-subtracted values:', zip('BGR', mu)
'''
transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))# RGB--->BGR
'''
可能你对均值设置成BGR有疑问,不是到后面才把RGB转为BGR吗?
其实transformer.setXXX()这些只是设置属性,实际执行顺序是参考附录preprocess函数定义
'''
image_names = [图片list]
for image_name in image_names:
image = caffe.io.load_image(image_name)# 用的是skimage库,见附录
# 利用刚刚的设置进行图片预处理
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
# 网络前传(测试无后传)
output = net.forward()
output_prob= output['prob'][0].argmax() # 概率最大的label
print image_name,output_prob
自定义版(全部使用PIL)
训练程序不动,只修改测试程序
import numpy as np
import os
import sys
from PIL import Image
caffe_root = 'your caffe root'
sys.path.insert(0, caffe_root + '/python')
import caffe
caffe.set_device(1)
caffe.set_mode_gpu()
model_def = 'xxx_deploy.prototxt'
model_weights = 'xxx.caffemodel'
net = caffe.Net(model_def, model_weights, caffe.TEST)
image_names = [图片list]
for image_name in image_names:
im = Image.open(image_name)
im = im.resize((w, h), Image.BILINEAR)
im = np.array(im, dtype=np.float32) # im:(h,w,3) RGB
im = im[:, :, ::-1] # im: (h,w,3) BGR
im -= np.array((104, 117, 123)) # 减去均值,注意是 BGR
im = im.transpose((2, 0, 1)) # im:(3,h,w)
net.blobs['data'].reshape(1, *im.shape)
net.blobs['data'].data[...] = im
output = net.forward()
output_prob= output['prob'][0].argmax() # 概率最大的label
print image_name,output_prob
自定义版(全部使用skimage)
修改训练数据重新训练(有点浪费时间),测试程序使用官方版[demo]
(https://github.com/BVLC/caffe/blob/master/examples/00-classification.ipynb)
训练数据生成的详细程序见之前博客caffe实现多标签输入
### 声明:未实际测试,谨慎使用
import skimage
from skimage import io,transform
#...
in_db=lmdb.open('your data(images) lmdb path',map_size=int(1e12))
with in_db.begin(write=True) as in_txn:
for in_idx,in_ in enumerate(file_list):
im_file='your images path'+in_
im=io.imread(im_file,as_grey=False)
im = transform.resize(im,(h, w),order=1)
im = im * 255 # [0-1]->[0-255]
im=im[:,:,::-1]#把im的RGB调整为BGR
im_dat=caffe.io.array_to_datum(im)
in_txn.put('{:0>10d}'.format(in_idx),im_dat.SerializeToString())
print 'data train: {} [{}/{}]'.format(in_, in_idx+1, len(file_list))
del im_file, im, im_dat
in_db.close()
print 'train data(images) are done!'
代码附录:
# 都在caffe_root/python/caffe/io.py中定义
# 附录preprocess函数定义
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:] # 初始化Transformer时传入了
#{'data': net.blobs['data'].data.shape} in_dims就是deploy的(h,w)
# resize_image见附录 shape[:2](h,w)-> in_dims
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims) # 默认双线性插值
# (h,w,c)->(c,h,w)
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
# RGB->BGR
if channel_swap is not None:
caffe_in = caffe_in[channel_swap, :, :]
# [0-1]->[0-255]
if raw_scale is not None:
caffe_in *= raw_scale
# 减去均值(BGR)
if mean is not None:
caffe_in -= mean
if input_scale is not None:
caffe_in *= input_scale
return caffe_in
# 附录caffe.io.load_image函数定义
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Parameters
----------
filename : string
color : boolean
flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Returns
-------
image : an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
############注意skimage###############
img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32) # 转为float32
if img.ndim == 2: # 灰度图 int gray = 0.3*red + 0.59*green + 0.11*blue;
# 改进版int gray = (77*red + 150*green + 29*blue + 128) >> 8;
img = img[:, :, np.newaxis] # (h,w)->(h,w,1)
if color:
img = np.tile(img, (1, 1, 3)) # (h,w,1)->(h,w,3)把灰度重复一下
elif img.shape[2] == 4: # shape:(h,w,c) 即c = 4 png4个通道 alpha透明值
img = img[:, :, :3] # 只取0,1,2这三个通道
return img
# 附录resize_image函数定义
def resize_image(im, new_dims, interp_order=1):
"""
Resize an image array with interpolation.
Parameters
----------
im : (H x W x K) ndarray
new_dims : (height, width) tuple of new dimensions.
interp_order : interpolation order, default is linear.双线性
参考上篇博客中interp_order的设置
Returns
-------
im : resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
# 灰度图或者RGB三通道图
if im.shape[-1] == 1 or im.shape[-1] == 3:
im_min, im_max = im.min(), im.max()
if im_max > im_min:
# skimage is fast but only understands {1,3} channel images
# in [0, 1].
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order)
# 文件开头有from skimage.transform import resize 就是skimage中的resize函数
resized_im = resized_std * (im_max - im_min) + im_min
else:
# the image is a constant -- avoid divide by 0
ret = np.empty((new_dims[0], new_dims[1], im.shape[-1]),
dtype=np.float32)
ret.fill(im_min)
return ret
else:
# ndimage interpolates anything but more slowly.
scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
resized_im = zoom(im, scale + (1,), order=interp_order)
return resized_im.astype(np.float32)