手指检测训练过程

最新推荐文章于 2024-08-07 11:18:34 发布

所思即所得

最新推荐文章于 2024-08-07 11:18:34 发布

阅读量1k

点赞数

分类专栏：图像检测

本文链接：https://blog.csdn.net/shouhan6396/article/details/89875200

版权

图像检测专栏收录该内容

3 篇文章 0 订阅

订阅专栏

读取制作的手指检测的数据集

from mxnet import gluon
from mxnet import image
from mxnet import nd
data_shape = 256
batch_size = 36
rgb_mean = nd.array([123,117,104])
def get_iterators(data_shape,batch_size):
    class_names = ['finger']
    num_class = len(class_names)
    train_iter = image.ImageDetIter(
        batch_size = batch_size,
        data_shape=(3,data_shape,data_shape),
        path_imgrec = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.rec',
        path_imgidx = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.idx',
        shuffle=True,
        mean = True,
        rand_crop=1,
        min_object_covered=0.95,
        max_attempts=200)
    val_iter = image.ImageDetIter(
        batch_size = batch_size,
        data_shape = (3,data_shape,data_shape),
        path_imgrec='/home/xm/桌面/finger_detection/data/rev/img_512_512_val.rec',
        shuffle=False,mean=True)
    return train_iter,val_iter,class_names,num_class
train_data,test_data,class_names,num_class = get_iterators(data_shape,batch_size)

将制作的数据集读取并可视化

%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
def box_to_rect(box,color,linewidth=3):
    box = box.asnumpy()
    return plt.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],
                        fill=False,edgecolor=color,linewidth=linewidth)
_,figs = plt.subplots(3,3,figsize=(6,6))
for i in range(3):
    for j in range(3):
        img,labels = batch.data[0][3*i+j],batch.label[0][3*i+j]
        img = img.transpose((1,2,0)) + rgb_mean
        img = img.clip(0,255).asnumpy()/255
        fig = figs[i][j]
        fig.imshow(img)
        for label in labels:
            rect = box_to_rect(label[1:5]*data_shape,'red',2)
            fig.add_patch(rect)
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
plt.show()

定义检测模型

from mxnet import gluon
class ToySSD(gluon.Block):
    def __init__(self, num_classes, **kwargs):
        super(ToySSD, self).__init__(**kwargs)
        # 5个预测层，每层负责的预设框尺寸不同，由小到大，符合网络的形状
        self.anchor_sizes = [[.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
        # 每层的预设框都用 1，2，0.5作为长宽比候选
        self.anchor_ratios = [[1, 2, .5]] * 5
        self.num_classes = num_classes

        with self.name_scope():
            self.body, self.downsamples, self.class_preds, self.box_preds = toy_ssd_model(4, num_classes)

    def forward(self, x):
        default_anchors, predicted_classes, predicted_boxes = toy_ssd_forward(x, self.body, self.downsamples,
            self.class_preds, self.box_preds, self.anchor_sizes, self.anchor_ratios)
        # 把从每个预测层输入的结果摊平并连接，以确保一一对应
        anchors = concat_predictions(default_anchors)
        box_preds = concat_predictions(predicted_boxes)
        class_preds = concat_predictions(predicted_classes)
        # 改变下形状，为了更方便地计算softmax
        class_preds = nd.reshape(class_preds, shape=(0, -1, self.num_classes + 1))

        return anchors, class_preds, box_preds
# 新建一个2个正类的SSD网络
net = ToySSD(2)
net.initialize()
x = batch.data[0][0:1]
default_anchors, class_predictions, box_predictions = net(x)
print('Outputs:', 'anchors', default_anchors.shape, 'class prediction', class_predictions.shape, 'box prediction', box_predictions.shape)

训练模型

import time 
from mxnet import autograd
for epoch in range(100):
    train_data.reset()
    cls_metric.reset()
    box_metric.reset()
    tic = time.time()
    for i,batch in enumerate(train_data):
        x = batch.data[0].as_in_context(ctx)
        y = batch.label[0].as_in_context(ctx)
        with autograd.record():
            default_anchors, class_predictions, box_predictions = net(x)
            box_target,box_mask,cls_target = training_targets(default_anchors, class_predictions,y)
            loss1 = cls_loss(class_predictions,cls_target)
            loss2 = box_loss(box_predictions,box_target,box_mask)
            loss = loss1 + loss2
        loss.backward()
        trainer.step(batch_size)
        cls_metric.update([cls_target],[class_predictions.transpose((0,2,1))])
        box_metric.update([box_target],[box_predictions*box_mask])
    print('Epoch %2d,train %s %.2f,%s %.5f,time %.1f sec'%(epoch,*cls_metric.get(),*box_metric.get(),time.time()-tic))

测试图片，显示效果

import numpy as np
import cv2

def preprocess(image):
    """Takes an image and apply preprocess"""
    # resize to data_shape
    image = cv2.resize(image, (data_shape, data_shape))
    # swap BGR to RGB
    image = image[:, :, (2, 1, 0)]
    # convert to float before subtracting mean
    image = image.astype(np.float32)
    # subtract mean
    image -= np.array([123, 117, 104])
    # organize as [batch-channel-height-width]
    image = np.transpose(image, (2, 0, 1))
    image = image[np.newaxis, :]
    # convert to ndarray
    image = nd.array(image)
    return image

image = cv2.imread('/home/xm/桌面/finger_detection/data/img512_512/2712.png')
x = preprocess(image)
print('x', x.shape)
# if pre-trained model is provided, we can load it
# net.load_params('ssd_%d.params' % epochs, ctx)
temp = time.time()
default_anchors, class_predictions, box_predictions = net(x.as_in_context(ctx))
print('predict time>>>>>>>>>>>>>>>>>>>%.4f sec'%(time.time()-temp))
print('anchors', anchors)
print('class predictions', class_predictions)
print('box delta predictions', box_predictions)
from mxnet.contrib.ndarray import MultiBoxDetection
# convert predictions to probabilities using softmax
class_predictions = nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
# apply shifts to anchors boxes, non-maximum-suppression, etc...
output = MultiBoxDetection(*[class_predictions,box_predictions, default_anchors], force_suppress=True, clip=False)
print(output)
def display(img, out, thresh=0.5):
    import random
    import matplotlib as mpl
    mpl.rcParams['figure.figsize'] = (10,10)
    pens = dict()
    plt.clf()
    plt.imshow(img)
    for det in out:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [img.shape[1], img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, 
                             edgecolor=pens[cid], linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12, color='white')
    plt.show()
display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.5)