R-CNN framework
下图展示了R-CNN检测框架的流程。
1. 输入图片;
2. 对输入图片应用selective search算法,产生约2,000个region proposals(实际为1565个,详见实验结果);
3. 对所有的region proposals进行预处理,使其大小相等(277*277*3,详见CNN网络定义)。并将处理后的region proposals通过CNN进行特征提取,每个region proposal产生一个固定大小的特征向量(4096维,详见CNN网络定义);
4. 对所有特征向量进行分类(原文采用SVM对特征向量进行分类,这里使用CNN直接分类)。
R-CNN system overview
实验
源代码见 (Caffe/examples/detection.ipynb)
!mkdir -p _temp
# 在_temp文件夹下的det_input.txt文件中写入输入图片
!echo `pwd`/images/fish-bike.jpg > _temp/det_input.txt
# 对输入图片应用selective search算法产生region proposals,并将产生的region proposals 经过训练好的CNN进行分类(200类)。
# CNN结构为Caffe/models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt
# 预训练的参数在models/bvlc_reference_rcnn_ilsvrc13/bvlc_reference_rcnn_ilsvrc13.caffemodel中
# 将所有region proposals的分类结果以及bouning box位置写入_temp文件夹下的det_output.h5中
!../python/detect.py --crop_mode=selective_search --pretrained_model=../models/bvlc_reference_rcnn_ilsvrc13/bvlc_reference_rcnn_ilsvrc13.caffemodel --model_def=../models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt --gpu --raw_scale=255 _temp/det_input.txt _temp/det_output.h5
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# 读入_temp文件夹下的det_output.h5文件,
df = pd.read_hdf('_temp/det_output.h5', 'df')
print(df.shape)
print(df.columns)
print(df.iloc[0])
# predictions_df
# 行:1565,每行代表一张图片
# 列:200,每列代表当前region proposal 为Caffe/data/ilsvrc12/det_synset_words.txt中某类的激活值
with open('../data/ilsvrc12/det_synset_words.txt') as f:
labels_df = pd.DataFrame([
{
'synset_id': l.strip().split(' ')[0],
'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]
}
for l in f.readlines()
])
predictions_df = pd.DataFrame(np.vstack(df.prediction.values), columns=labels_df['name'])
print(predictions_df.iloc[0])
# 可视化1565 * 200 的激活值
plt.gray()
plt.matshow(predictions_df.values)
plt.xlabel('Classes')
plt.ylabel('Windows')
# 得出图片中最有可能出现的前10个类别
max_s = predictions_df.max(0)
max_s.sort_values(ascending=False)
print(max_s[:10])
# 绘制出前两个类别对象的bounding box
# Find, print, and display the top detections: person and bicycle.
i = predictions_df['person'].argmax()
j = predictions_df['bicycle'].argmax()
# Show top predictions for top detection.
f = pd.Series(df['prediction'].iloc[i], index=labels_df['name'])
print('Top detection:')
print(f.sort_values(ascending=False)[:5])
print('')
# Show top predictions for second-best detection.
f = pd.Series(df['prediction'].iloc[j], index=labels_df['name'])
print('Second-best detection:')
print(f.sort_values(ascending=False)[:5])
# Show top detection in red, second-best top detection in blue.
im = plt.imread('images/fish-bike.jpg')
plt.imshow(im)
currentAxis = plt.gca()
det = df.iloc[i]
coords = (det['xmin'], det['ymin']), det['xmax'] - det['xmin'], det['ymax'] - det['ymin']
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor='r', linewidth=5))
det = df.iloc[j]
coords = (det['xmin'], det['ymin']), det['xmax'] - det['xmin'], det['ymax'] - det['ymin']
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor='b', linewidth=5))
# Find, print, and display the top detections: person and bicycle.
i = predictions_df['person'].argmax()
j = predictions_df['bicycle'].argmax()
# Show top predictions for top detection.
f = pd.Series(df['prediction'].iloc[i], index=labels_df['name'])
print('Top detection:')
print(f.sort_values(ascending=False)[:5])
print('')
# Show top predictions for second-best detection.
f = pd.Series(df['prediction'].iloc[j], index=labels_df['name'])
print('Second-best detection:')
print(f.sort_values(ascending=False)[:5])
# Show top detection in red, second-best top detection in blue.
im = plt.imread('images/fish-bike.jpg')
plt.imshow(im)
currentAxis = plt.gca()
det = df.iloc[i]
coords = (det['xmin'], det['ymin']), det['xmax'] - det['xmin'], det['ymax'] - det['ymin']
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor='r', linewidth=5))
det = df.iloc[j]
coords = (det['xmin'], det['ymin']), det['xmax'] - det['xmin'], det['ymax'] - det['ymin']
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor='b', linewidth=5))
def nms_detections(dets, overlap=0.3):
"""
Non-maximum suppression: Greedily select high-scoring detections and
skip detections that are significantly covered by a previously
selected detection.
This version is translated from Matlab code by Tomasz Malisiewicz,
who sped up Pedro Felzenszwalb's code.
Parameters
----------
dets: ndarray
each row is ['xmin', 'ymin', 'xmax', 'ymax', 'score']
overlap: float
minimum overlap ratio (0.3 default)
Output
------
dets: ndarray
remaining after suppression.
"""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
ind = np.argsort(dets[:, 4])
w = x2 - x1
h = y2 - y1
area = (w * h).astype(float)
pick = []
while len(ind) > 0:
i = ind[-1]
pick.append(i)
ind = ind[:-1]
xx1 = np.maximum(x1[i], x1[ind])
yy1 = np.maximum(y1[i], y1[ind])
xx2 = np.minimum(x2[i], x2[ind])
yy2 = np.minimum(y2[i], y2[ind])
w = np.maximum(0., xx2 - xx1)
h = np.maximum(0., yy2 - yy1)
wh = w * h
o = wh / (area[i] + area[ind] - wh)
ind = ind[np.nonzero(o <= overlap)[0]]
return dets[pick, :]
scores = predictions_df['bicycle']
windows = df[['xmin', 'ymin', 'xmax', 'ymax']].values
dets = np.hstack((windows, scores[:, np.newaxis]))
nms_dets = nms_detections(dets)
plt.imshow(im)
currentAxis = plt.gca()
colors = ['r', 'b', 'y']
for c, det in zip(colors, nms_dets[:3]):
currentAxis.add_patch(
plt.Rectangle((det[0], det[1]), det[2]-det[0], det[3]-det[1],
fill=False, edgecolor=c, linewidth=5)
)
print 'scores:', nms_dets[:3, 4]
!rm -rf _temp