读取制作的手指检测的数据集
from mxnet import gluon
from mxnet import image
from mxnet import nd
data_shape = 256
batch_size = 36
rgb_mean = nd.array([123,117,104])
def get_iterators(data_shape,batch_size):
class_names = ['finger']
num_class = len(class_names)
train_iter = image.ImageDetIter(
batch_size = batch_size,
data_shape=(3,data_shape,data_shape),
path_imgrec = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.rec',
path_imgidx = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.idx',
shuffle=True,
mean = True,
rand_crop=1,
min_object_covered=0.95,
max_attempts=200)
val_iter = image.ImageDetIter(
batch_size = batch_size,
data_shape = (3,data_shape,data_shape),
path_imgrec='/home/xm/桌面/finger_detection/data/rev/img_512_512_val.rec',
shuffle=False,mean=True)
return train_iter,val_iter,class_names,num_class
train_data,test_data,class_names,num_class = get_iterators(data_shape,batch_size)
将制作的数据集读取并可视化
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
def box_to_rect(box,color,linewidth=3):
box = box.asnumpy()
return plt.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],
fill=False,edgecolor=color,linewidth=linewidth)
_,figs = plt.subplots(3,3,figsize=(6,6))
for i in range(3):
for j in range(3):
img,labels = batch.data[0][3*i+j],batch.label[0][3*i+j]
img = img.transpose((1,2,0)) + rgb_mean
img = img.clip(0,255).asnumpy()/255
fig = figs[i][j]
fig.imshow(img)
for label in labels:
rect = box_to_rect(label[1:5]*data_shape,'red',2)
fig.add_patch(rect)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.show()
定义检测模型
from mxnet import gluon
class ToySSD(gluon.Block):
def __init__(self, num_classes, **kwargs):
super(ToySSD, self).__init__(**kwargs)
# 5个预测层,每层负责的预设框尺寸不同,由小到大,符合网络的形状
self.anchor_sizes = [[.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
# 每层的预设框都用 1,2,0.5作为长宽比候选
self.anchor_ratios = [[1, 2, .5]] * 5
self.num_classes = num_classes
with self.name_scope():
self.body, self.downsamples, self.class_preds, self.box_preds = toy_ssd_model(4, num_classes)
def forward(self, x):
default_anchors, predicted_classes, predicted_boxes = toy_ssd_forward(x, self.body, self.downsamples,
self.class_preds, self.box_preds, self.anchor_sizes, self.anchor_ratios)
# 把从每个预测层输入的结果摊平并连接,以确保一一对应
anchors = concat_predictions(default_anchors)
box_preds = concat_predictions(predicted_boxes)
class_preds = concat_predictions(predicted_classes)
# 改变下形状,为了更方便地计算softmax
class_preds = nd.reshape(class_preds, shape=(0, -1, self.num_classes + 1))
return anchors, class_preds, box_preds
# 新建一个2个正类的SSD网络
net = ToySSD(2)
net.initialize()
x = batch.data[0][0:1]
default_anchors, class_predictions, box_predictions = net(x)
print('Outputs:', 'anchors', default_anchors.shape, 'class prediction', class_predictions.shape, 'box prediction', box_predictions.shape)
训练模型
import time
from mxnet import autograd
for epoch in range(100):
train_data.reset()
cls_metric.reset()
box_metric.reset()
tic = time.time()
for i,batch in enumerate(train_data):
x = batch.data[0].as_in_context(ctx)
y = batch.label[0].as_in_context(ctx)
with autograd.record():
default_anchors, class_predictions, box_predictions = net(x)
box_target,box_mask,cls_target = training_targets(default_anchors, class_predictions,y)
loss1 = cls_loss(class_predictions,cls_target)
loss2 = box_loss(box_predictions,box_target,box_mask)
loss = loss1 + loss2
loss.backward()
trainer.step(batch_size)
cls_metric.update([cls_target],[class_predictions.transpose((0,2,1))])
box_metric.update([box_target],[box_predictions*box_mask])
print('Epoch %2d,train %s %.2f,%s %.5f,time %.1f sec'%(epoch,*cls_metric.get(),*box_metric.get(),time.time()-tic))
测试图片,显示效果
import numpy as np
import cv2
def preprocess(image):
"""Takes an image and apply preprocess"""
# resize to data_shape
image = cv2.resize(image, (data_shape, data_shape))
# swap BGR to RGB
image = image[:, :, (2, 1, 0)]
# convert to float before subtracting mean
image = image.astype(np.float32)
# subtract mean
image -= np.array([123, 117, 104])
# organize as [batch-channel-height-width]
image = np.transpose(image, (2, 0, 1))
image = image[np.newaxis, :]
# convert to ndarray
image = nd.array(image)
return image
image = cv2.imread('/home/xm/桌面/finger_detection/data/img512_512/2712.png')
x = preprocess(image)
print('x', x.shape)
# if pre-trained model is provided, we can load it
# net.load_params('ssd_%d.params' % epochs, ctx)
temp = time.time()
default_anchors, class_predictions, box_predictions = net(x.as_in_context(ctx))
print('predict time>>>>>>>>>>>>>>>>>>>%.4f sec'%(time.time()-temp))
print('anchors', anchors)
print('class predictions', class_predictions)
print('box delta predictions', box_predictions)
from mxnet.contrib.ndarray import MultiBoxDetection
# convert predictions to probabilities using softmax
class_predictions = nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
# apply shifts to anchors boxes, non-maximum-suppression, etc...
output = MultiBoxDetection(*[class_predictions,box_predictions, default_anchors], force_suppress=True, clip=False)
print(output)
def display(img, out, thresh=0.5):
import random
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (10,10)
pens = dict()
plt.clf()
plt.imshow(img)
for det in out:
cid = int(det[0])
if cid < 0:
continue
score = det[1]
if score < thresh:
continue
if cid not in pens:
pens[cid] = (random.random(), random.random(), random.random())
scales = [img.shape[1], img.shape[0]] * 2
xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,
edgecolor=pens[cid], linewidth=3)
plt.gca().add_patch(rect)
text = class_names[cid]
plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
bbox=dict(facecolor=pens[cid], alpha=0.5),
fontsize=12, color='white')
plt.show()
display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.5)