原文链接:https://github.com/caffe2/tutorials/blob/master/Loading_Pretrained_Models.ipynb
model源码:https://github.com/caffe2/models
注释:本文会修改原文的部分内容。
#!/usr/bin/env python2
from caffe2.python import workspace, model_helper
import numpy as np
import skimage
import skimage.io as io
import operator
import urllib2
from matplotlib import pyplot
1. from caffe2.python.models import squeezenet as mynet
PS:使用install命令安装后的model会比源码多一个__init__.py文件,有了这个文件后才能入上边所示,直接导入模型。
CAFFE_MODELS = "/home/wujing/software/software/lib/python2.7/site-packages/caffe2/python/models"
'''================image preprocessing================'''
2. Q1==>the size of image as the input for the model is random or just a fixed numer?
# A1==>trained CNN requires a specifically sized input image whose values are from a particular distribution,这个具体的值在model文件夹下的value_info.json会有明确定义。
IMAGE_LOCATION = "/home/wujing/matieral/caffe2/caffe2_tutorials/images/lemon.jpg"
INPUT_IMAGE_SIZE = 227 #参考value_info.json设定
codes = "https://gist.githubusercontent.com/aaronmarkham/cd3a6b6ac071eca6f7b4a6e40e6038aa/raw/9edb4038a37da6b5a44c3b5bc52e448ff09bfe5b/alexnet_codes"
# only save the center of the image, the edge is removed
3. crop_center实现了哪些功能?
#裁剪图片:只保留图片中心周围的特定区域(该区域的范围是cropx*cropy),图片边缘被裁掉。
def crop_center(img,cropx,cropy):
y,x,c = img.shape
startx = x//2-(cropx//2)
starty = y//2-(cropy//2)
return img[starty:starty+cropy,startx:startx+cropx]
4. rescale的作用
#在保证图片纵横比的情况下,对其进行放缩。
# Function to rescale the input image to the desired height and/or width. This function will preserve
# the aspect ratio of the original image while making the image the correct scale so we can retrieve
# a good center crop. This function is best used with center crop to resize any size input images into
# specific sized images that our model can use
def rescale(img, input_height, input_width):
# Get original aspect ratio
aspect = img.shape[1]/float(img.shape[0])
if(aspect>1):
# landscape orientation - wide image
res = int(aspect * input_height)
imgScaled = skimage.transform.resize(img, (input_width, res))
if(aspect<1):
# portrait orientation - tall image
res = int(input_width/aspect)
imgScaled = skimage.transform.resize(img, (res, input_height))
if(aspect == 1):
imgScaled = skimage.transform.resize(img, (input_width, input_height))
return imgScaled
5. rescale_fx_HW的作用
# 直接将图片的长宽调整到指定大小,可能会改变图片原有的纵横比。
def rescale_fix_HW(img,input_height, input_width):
imgScaled = skimage.transform.resize(img, (input_width, input_height))
return imgScaled
6. show_results_name的作用
# NN返回的结果是一个1000*1的向量,每个element表示该位置的confidence,我们要把最高confidence对应的index转化成其在ImageNet中的名称。N表示提取topN的String_name
def show_results_name(results,N):
# the rest of this is digging through the results
results = np.delete(results, 1)
index = 0
highest = 0
arr = np.empty((0,2), dtype=object)
arr[:,0] = int(10)
arr[:,1:] = float(10)
for i, r in enumerate(results):
# imagenet index begins with 1!
i=i+1
arr = np.append(arr, np.array([[i,r]]), axis=0)
if (r > highest):
highest = r
index = i
# top N results
topN = sorted(arr, key=lambda x: x[1], reverse=True)[:N]
print("Raw top {} results: {}".format(N,topN))
# Isolate the indexes of the top-N most likely classes
topN_inds = [int(x[0]) for x in topN]
print("Top {} classes in order: {}".format(N,topN_inds))
# Now we can grab the code list and create a class Look Up Table
response = urllib2.urlopen(codes)
class_LUT = []
for line in response:
code, result = line.partition(":")[::2]
code = code.strip()
result = result.replace("'", "")
if code.isdigit():
class_LUT.append(result.split(",")[0][1:])
# For each of the top-N results, associate the integer result with an actual class
for n in topN:
print("Model predicts '{}' with {}% confidence".format(class_LUT[int(n[0])],float("{0:.2f}".format(n[1]*100))))
7. 导入图片
原始图片使用uint表示,如果用uint导入,图片每个位置的值都是【0,255】的一个整数。用float导入后,每个位置的像素值是一个小于1的浮点数。在shape修改完成后,要调整像素值的大小,否则会出现程序可以正常执行,但预测结果始终不正确的问题。
img = skimage.img_as_float(skimage.io.imread(IMAGE_LOCATION)).astype(np.float32)
print("Original Image Shape: ",img.shape)#输出的格式是(HWC)
#pyplot.figure()
#pyplot.imshow(img)
8. 根据model对input的格式要求调整input
''vaule_info ===> {"data": [1, [1, 3, 227, 227]]}
'
'''
#=============directly scale to fixed HW =====================
img = rescale_fix_HW(img, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE) #伸缩到特定size
#pyplot.figure()
#pyplot.imshow(img2)
#print("Image Shape after rescaling: " , img.shape)
# remove mean for better results
mean = 128
img = img * 255 - mean#调整像素值
# =========== =rescale + crop ==========================
# Rescale the image to comply with our desired input size. This will not make the image 227x227
# but it will make either the height or width 227 so we can get the ideal center crop.
# img = rescale(img, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
# print("Image Shape after rescaling: " , img.shape)
# pyplot.figure()
# pyplot.imshow(img)
# pyplot.title('Rescaled image')
#
# # Crop the center 227x227 pixels of the image so we can feed it to our model
# img = crop_center(img, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
# print("Image Shape after cropping: " , img.shape)
# pyplot.figure()
# pyplot.imshow(img)
# pyplot.title('Center Cropped')
# =============================================================================
9. tweek HWC-->CWH.
#this change the way of how to store the image in the memory
img = img.swapaxes(1, 2).swapaxes(0, 1)#交换两个维度,先换W和C,变成HCW,在交换H和C行程CHW。
print("CHW Image Shape: " , img.shape)
10. switch RGB --> BGR
#caffe2 needs image to be BGR
img = img[(2, 1, 0), :, :]
11. add batch size axis which completes the formation of the NCHW shaped input that we want,以满足info的要求
img = img[np.newaxis, :, :, :].astype(np.float32)
print("NCHW image (ready to be used as input): ", img.shape)
''12. 'execute the model '''
init_net = mynet.init_net #这里可以直接读文件
predict_net = mynet.predict_net
p = workspace.Predictor(init_net.SerializeToString(), predict_net.SerializeToString())
results = p.run({"data":img})#here must be : rather than ,
'''13. display the result'''
# Turn it into something we can play with and examine which is in a multi-dimensional array
results = np.asarray(results)
print("results shape: ", results.shape)
# Quick way to get the top-1 prediction result
# Squeeze out the unnecessary axis. This returns a 1-D array of length 1000
preds = np.squeeze(results)#Remove single-dimensional entries from the shape of an array.
# Get the prediction and the confidence by finding the maximum value and index of maximum value in preds array
curr_conf = np.max(results)
curr_pred = np.argmax(results )
print("Prediction: ", curr_pred)
print("Confidence: ", curr_conf)
show_results_name(results,1)