手指检测训练过程

读取制作的手指检测的数据集

from mxnet import gluon
from mxnet import image
from mxnet import nd
data_shape = 256
batch_size = 36
rgb_mean = nd.array([123,117,104])
def get_iterators(data_shape,batch_size):
    class_names = ['finger']
    num_class = len(class_names)
    train_iter = image.ImageDetIter(
        batch_size = batch_size,
        data_shape=(3,data_shape,data_shape),
        path_imgrec = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.rec',
        path_imgidx = '/home/xm/桌面/finger_detection/data/rev/img_512_512_train.idx',
        shuffle=True,
        mean = True,
        rand_crop=1,
        min_object_covered=0.95,
        max_attempts=200)
    val_iter = image.ImageDetIter(
        batch_size = batch_size,
        data_shape = (3,data_shape,data_shape),
        path_imgrec='/home/xm/桌面/finger_detection/data/rev/img_512_512_val.rec',
        shuffle=False,mean=True)
    return train_iter,val_iter,class_names,num_class
train_data,test_data,class_names,num_class = get_iterators(data_shape,batch_size)

将制作的数据集读取并可视化

%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
def box_to_rect(box,color,linewidth=3):
    box = box.asnumpy()
    return plt.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],
                        fill=False,edgecolor=color,linewidth=linewidth)
_,figs = plt.subplots(3,3,figsize=(6,6))
for i in range(3):
    for j in range(3):
        img,labels = batch.data[0][3*i+j],batch.label[0][3*i+j]
        img = img.transpose((1,2,0)) + rgb_mean
        img = img.clip(0,255).asnumpy()/255
        fig = figs[i][j]
        fig.imshow(img)
        for label in labels:
            rect = box_to_rect(label[1:5]*data_shape,'red',2)
            fig.add_patch(rect)
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
plt.show()

定义检测模型

from mxnet import gluon
class ToySSD(gluon.Block):
    def __init__(self, num_classes, **kwargs):
        super(ToySSD, self).__init__(**kwargs)
        # 5个预测层,每层负责的预设框尺寸不同,由小到大,符合网络的形状
        self.anchor_sizes = [[.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
        # 每层的预设框都用 1,2,0.5作为长宽比候选
        self.anchor_ratios = [[1, 2, .5]] * 5
        self.num_classes = num_classes

        with self.name_scope():
            self.body, self.downsamples, self.class_preds, self.box_preds = toy_ssd_model(4, num_classes)

    def forward(self, x):
        default_anchors, predicted_classes, predicted_boxes = toy_ssd_forward(x, self.body, self.downsamples,
            self.class_preds, self.box_preds, self.anchor_sizes, self.anchor_ratios)
        # 把从每个预测层输入的结果摊平并连接,以确保一一对应
        anchors = concat_predictions(default_anchors)
        box_preds = concat_predictions(predicted_boxes)
        class_preds = concat_predictions(predicted_classes)
        # 改变下形状,为了更方便地计算softmax
        class_preds = nd.reshape(class_preds, shape=(0, -1, self.num_classes + 1))

        return anchors, class_preds, box_preds
# 新建一个2个正类的SSD网络
net = ToySSD(2)
net.initialize()
x = batch.data[0][0:1]
default_anchors, class_predictions, box_predictions = net(x)
print('Outputs:', 'anchors', default_anchors.shape, 'class prediction', class_predictions.shape, 'box prediction', box_predictions.shape)

训练模型

import time 
from mxnet import autograd
for epoch in range(100):
    train_data.reset()
    cls_metric.reset()
    box_metric.reset()
    tic = time.time()
    for i,batch in enumerate(train_data):
        x = batch.data[0].as_in_context(ctx)
        y = batch.label[0].as_in_context(ctx)
        with autograd.record():
            default_anchors, class_predictions, box_predictions = net(x)
            box_target,box_mask,cls_target = training_targets(default_anchors, class_predictions,y)
            loss1 = cls_loss(class_predictions,cls_target)
            loss2 = box_loss(box_predictions,box_target,box_mask)
            loss = loss1 + loss2
        loss.backward()
        trainer.step(batch_size)
        cls_metric.update([cls_target],[class_predictions.transpose((0,2,1))])
        box_metric.update([box_target],[box_predictions*box_mask])
    print('Epoch %2d,train %s %.2f,%s %.5f,time %.1f sec'%(epoch,*cls_metric.get(),*box_metric.get(),time.time()-tic))

测试图片,显示效果

import numpy as np
import cv2

def preprocess(image):
    """Takes an image and apply preprocess"""
    # resize to data_shape
    image = cv2.resize(image, (data_shape, data_shape))
    # swap BGR to RGB
    image = image[:, :, (2, 1, 0)]
    # convert to float before subtracting mean
    image = image.astype(np.float32)
    # subtract mean
    image -= np.array([123, 117, 104])
    # organize as [batch-channel-height-width]
    image = np.transpose(image, (2, 0, 1))
    image = image[np.newaxis, :]
    # convert to ndarray
    image = nd.array(image)
    return image

image = cv2.imread('/home/xm/桌面/finger_detection/data/img512_512/2712.png')
x = preprocess(image)
print('x', x.shape)
# if pre-trained model is provided, we can load it
# net.load_params('ssd_%d.params' % epochs, ctx)
temp = time.time()
default_anchors, class_predictions, box_predictions = net(x.as_in_context(ctx))
print('predict time>>>>>>>>>>>>>>>>>>>%.4f sec'%(time.time()-temp))
print('anchors', anchors)
print('class predictions', class_predictions)
print('box delta predictions', box_predictions)
from mxnet.contrib.ndarray import MultiBoxDetection
# convert predictions to probabilities using softmax
class_predictions = nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
# apply shifts to anchors boxes, non-maximum-suppression, etc...
output = MultiBoxDetection(*[class_predictions,box_predictions, default_anchors], force_suppress=True, clip=False)
print(output)
def display(img, out, thresh=0.5):
    import random
    import matplotlib as mpl
    mpl.rcParams['figure.figsize'] = (10,10)
    pens = dict()
    plt.clf()
    plt.imshow(img)
    for det in out:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [img.shape[1], img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, 
                             edgecolor=pens[cid], linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12, color='white')
    plt.show()
display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.5)
基于YOLOv5模型实现关键点检测的方法如下: 1. 首先,需要准备训练数据集,包含带有关键点标注的图像数据。关键点标注是指在图像中标记出人体的关键点位置,例如手指、手腕等。 2. 使用YOLOv5模型进行训练。YOLOv5模型是一种基于深度学习的目标检测模型,可以用于检测和定位图像中的目标物体。在训练过程中,将训练数据集输入到YOLOv5模型中,通过反向传播算法优化模型参数,使其能够准确地检测出关键点。 3. 在训练完成后,可以使用已训练好的YOLOv5模型进行关键点检测。将待检测图像输入到模型中,模型会输出检测到的关键点的位置信息。 4. 根据输出的关键点位置信息,可以进行进一步的分析和处理。例如,可以计算关键点之间的距离、角度等信息,用于手势识别或其他应用。 下面是一个示例代码,演示如何使用YOLOv5模型实现关键点检测: ```python import torch from PIL import Image # 加载已训练好的YOLOv5模型 model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # 加载待检测图像 image = Image.open('image.jpg') # 进行关键点检测 results = model(image) # 获取关键点位置信息 keypoints = results.xyxy[0][:, 5:15] # 打印关键点位置信息 print(keypoints) # 可以根据需要进行进一步的处理和分析 ``` 请注意,以上代码仅为示例,实际使用时需要根据具体情况进行适当的修改和调整。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值