CNN-单目标追踪实战(Torch实现)

目录

1、概述

1、追踪对象

2、追踪目的

3、预备素材

2、准备训练样本

3、模型训练

4、测试训练的结果


1、概述

1、追踪对象

image

2、追踪目的

当追踪对象出现在任意场景时能够通过训练处的网络模型精准识别并进行定位

3、预备素材

20张卡通图像:

若干背景图(越多越好)

2、准备训练样本

1、将带透明通道的卡通图像存放在本地文件夹中

image.png

2、下载背景图片(至少1000张)到本地

image.png

 

3、制造训练样本

本次训练有正样本和负样本两类。

正样本:背景图上有卡通人图片,标签置信度为1,还包含卡通图的四个坐标值(左上角和右下角的坐标),网络模型学习的是图像的四个坐标值和置信度。

负样本:背景图上无卡通人图片,标签置信为0,负样本的四个坐标值为0

import os
import numpy as np
from PIL import Image

def gen_datasets(bg_path, minions_path, img_path, label_path):
    count = 0
    with open(label_path, "w") as f:
        for filename in os.listdir(bg_path):                             #遍历文件列表              操作背景图

            bg_img = Image.open("{0}/{1}".format(bg_path, filename))
            bg_img = bg_img.convert("RGB")                                #转换通道
            bg_img = bg_img.resize((224, 224))                            #统一大小

            bg_img.save("{}/{}.png".format(img_path, count))              #保存变化后的图像

            f.write("{}.png {} {} {} {} {}\n".format(count, 0, 0, 0, 0, 0))   #负样本
            count += 1
            name = np.random.randint(1, 21)
            minions_img = Image.open("{}/{}.png".format(minions_path, name))
            new_w = np.random.randint(50, 100)
            new_h = np.random.randint(50, 100)
            resize_img = minions_img.resize((new_w, new_h))               #随机缩放
            rot_img = resize_img.rotate(np.random.randint(-180,180))      #随机旋转
            paste_x1 = np.random.randint(0, 224-new_w)
            paste_y1 = np.random.randint(0, 224-new_h)
            r, g, b, a = rot_img.split()                                   #划分出透明通道
            bg_img.paste(rot_img, (paste_x1, paste_y1), mask=a)           #在透明通道上粘贴         #合并小黄人与背景图
            paste_x2 = paste_x1+new_w
            paste_y2 = paste_y1+new_h
            bg_img.save("{}/{}.png".format(img_path, count))               #保存处理后的图片
            f.write("{}.png {} {} {} {} {}\n".format(
            count, 1, paste_x1, paste_y1, paste_x2, paste_y2))
            count += 1
            print(count)
            if count > 1000:
                print(count)
                break

if __name__ == '__main__':
    bg_img = r"D:\Desktop\Learnn\Minions\back_ground_dir"
    minions_img = r"D:\Desktop\Learnn\Minions\minions_dir"
    root_dir = r"D:\Desktop\Learnn\Minions"

    # train_img = r"./train_img"
    train_img = os.path.join(root_dir, "train_img")
    validate_img = os.path.join(root_dir, "validate_img")
    test_img = os.path.join(root_dir, "test_img")

    for i in (train_img,validate_img, test_img):
        if not os.path.isdir(i):
            os.makedirs(train_img)

    train_label = r"./train_label.txt"
    validate_label = r"./validate_label.txt"
    test_label = r"./test_label.txt"

    gen_datasets(bg_img, minions_img, train_img, train_label)
    gen_datasets(bg_img, minions_img, validate_img, validate_label)
    gen_datasets(bg_img, minions_img, test_img, test_label)

生成的样本(部分截图)

image.png

生成的标签(部分截图)

image.png

3、模型训练

1、构建采样器

from torch.utils import data
import os
from PIL import Image
from torchvision import transforms
import torch

class Mydata(data.Dataset):
    def __init__(self, img_path, lab_path):
        self.dataset = []
        with open(lab_path, "r") as f:
            filenames = f.readlines()
            for filename in filenames:
                filename = filename.split()
                self.dataset.append([os.path.join(img_path, filename[0]), filename[1:6]])   #将数据的绝对路径和标签放在一起
                # print(img_path,"*********************************************")
            # print(self.dataset)

    def __len__(self):
        # print(len(self.dataset))
        return len(self.dataset)

    def __getitem__(self, item):
        data = self.dataset[item]   #取得一张图片的路径,包含路径和标签
        # print(data)
        # yc = (data[1][0])
        # x1 = float(data[1][1])/224
        # y1 = float(data[1][2])/224
        # x2 = float(data[1][3])/224
        # y2 = float(data[1][4])/224
        a = self.dataset[item][1][0:5]
        y = [float(a[0])]
        for i in a[1:5]:
            b = float(i)/224
            y.append(b)
        # print(y)
        x = self.data_scale(Image.open(data[0]))  #缩放
        # x = torch.tensor(x)
        y = torch.tensor(y)
        # print(x,"xxxxxxxxxx")
        # print(y,"yyyyyyyyyyy")

        return x, y

    def data_scale(self, x):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])(x)

if __name__ == '__main__':   #测试数据是否可用

    label_path = r"D:\Desktop\Learnn\Minions\train_label.txt"
    data_path = r"D:\Desktop\Learnn\Minions\train_img"
    # data = data.DataLoader(mydata,100,shuffle=True)   #加载数据,每次10张
    # train_label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
    # train_img_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"

    mydata = Mydata(data_path, label_path)
    data = data.DataLoader(mydata, 10, shuffle=True)
    for i, (x1, y1) in enumerate(data):
        x = x1.cuda()
        y = y1.cuda()
        # print(x1, "xxxxx")
        # print(y1, "yyyyyy")

2、构建训练的文件

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.con1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1,   #224
                 padding=1, dilation=1, groups=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))       #112
        # self.fc = nn.Linear(64*112*112, 5)
        self.con2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))      #56
        self.con3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=8),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))     #28
        self.con4 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=8),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))    #14
        self.con5 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))    #7
        self.con6 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(128),
            nn.ReLU())                    #7
        self.con7 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(32),
            nn.ReLU())   #3
        self.con8 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.AvgPool2d((2,2),2))

        self.fc = nn.Linear(16 * 3 * 3, 5)

    def forward(self, x):
        y = self.con1(x)
        # print(y.shape)
        y = self.con2(y)
        # print(y.shape)
        y = self.con3(y)
        # print(y.shape)
        y = self.con4(y)
        # print(y.shape)
        y = self.con5(y)
        # print(y.shape)
        y = self.con6(y)
        # print(y.shape)
        y = self.con7(y)
        # print(y.shape)
        y = self.con8(y)
        # print(y.shape)
        # print("=======================")
        y = torch.reshape(y, [y.size(0), -1])
        # print(y.shape)

        y = self.fc(y)

        coordinate = torch.relu(y[:, 1:])
        confident = torch.sigmoid(y[:, 0])
        return coordinate, confident


if __name__ == '__main__':
    batch_size1 = 5                 #训练集相关数据
    data_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"
    label_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
    # save_params = "./minionsparams1.pth"
    save_net = "./minionsnet1.pth"
    train_data = dataset1.Mydata(data_path1, label_path1)
    train_loader = data.DataLoader(train_data, batch_size1, shuffle=True)

    batch_size =10                                                               #验证集相关数据
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_label.txt"
    validation_data = dataset1.Mydata(data_path, label_path)
    validation_loader = data.DataLoader(validation_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    # net = Net().to(device)
    # net.load_state_dict(torch.load(save_params))
    net = torch.load(save_net).to(device)

    loss_fn1 = nn.BCELoss()
    loss_fn2 = nn.MSELoss()

    optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)

    net.train()

    for epoch in range(4000):
        train_conf_loss = 0
        train_coor_loss = 0
        total_loss = 0
        l1 = []
        l2 = []
        l3 = []
        l4 = []
        # train_acc = 0
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            coordinate, confident = net(x)
            confidence = confident
            coordinate = coordinate

            confidence_label = y[:, 0:1]
            coordinate_label = y[:, 1:]
            loss1 = loss_fn1(torch.reshape(confidence, (confidence.size(0), -1)), confidence_label)   #踩坑记:如果直接confidence不转化形状程序可以运行但是会报错
            loss2 = loss_fn2(coordinate, coordinate_label)
            loss = loss1*0.8 + loss2*0.2

            optim.zero_grad()
            loss.backward()
            optim.step()

            train_conf_loss += loss1.item()         #转到CPU上运算
            train_coor_loss += loss2.item()
            total_loss += loss.item()


            # print("==================")
            for m in coordinate.cpu().detach().numpy():
                for k in m:
                    l1.append(k)
            for n in confident.cpu().detach().numpy():
                l2.append(n)

            for m in confidence_label.cpu().numpy():
                l3.append(m)
            for n in coordinate_label.cpu().numpy():
                for k in n:
                    l4.append(k)

        print("coor     :")
        print("r2       :", r2_score(l4, l1))
        print("explained:", explained_variance_score(l4, l1))
        print("meanabs  :", mean_absolute_error(l4, l1))
        print("meansq   :", mean_squared_error(l4, l1))
        print()
        print("conf     :")
        print("r2       :", r2_score(l3, l2))      #踩坑记:标签和预测值位置放反则会导致r2得分为负数
        print("explained:", explained_variance_score(l3, l2))
        print("meanabs  :", mean_absolute_error(l3, l2))
        print("meansq   :", mean_squared_error(l3, l2))
        print()
        train_avg_conf_loss = train_conf_loss/len(train_data)
        train_avg_coor_loss = train_coor_loss/len(train_data)
        train_avg_total_loss = total_loss / len(train_data)
        print("train:")
        print("epoch:{},train_avg_conf_loss:{:.4f}".format(epoch, train_avg_conf_loss))
        print("epoch:{},train_avg_coor_loss:{:.6f}".format(epoch, train_avg_coor_loss))
        print("epoch:{},train_avg_total_loss:{:.4f}".format(epoch, train_avg_total_loss))
        print()

        # torch.save(net, r"./minionsnet2.pth")
        # torch.save(net.state_dict(), save_params)

        #验证
        if epoch % 10 == 0:
            valid_conf_loss = 0
            valid_coor_loss = 0
            total_loss = 0
            for i, (x, y) in enumerate(validation_loader):
                img_or_np = x.cpu().detach().numpy()  # 将图像数据转成numpy
                img_np = (img_or_np * 0.5 + 0.5) * 224  # 恢复原始像素
                img_np = img_np[0]
                # print(img_np.shape)

                img_np = img_np.swapaxes(0, 1)  # 从CHW转为HWC
                img_np = img_np.swapaxes(1, 2)
                # print(img_np.shape)
                # print(img_np[0].shape)
                img = Image.fromarray(numpy.uint8(img_np))  # 注意:此处不加numpy.uint8()会报错
                # print(img.shape)
                # img.show()
                # img.close()

                x = x.to(device)                             #将参数传入网络
                y = y.to(device)
                out1, out2 = net(x)
                out2 = torch.reshape(out2, (out2.size(0), -1))

                # print(out1, "out1")
                # print(out2, "out2")
                label_coor = y.cpu().detach().numpy()[0]  # 获取标签的四个坐标值
                x1_l = label_coor[1] * 224
                y1_l = label_coor[2] * 224
                x2_l = label_coor[3] * 224
                y2_l = label_coor[4] * 224

                # print(label_coor)
                # print(y, "yyyyyyyyyyyyyyyyyyyyy")
                confidence = out2.cpu().detach().numpy()[0]
                # confidence = format(confidence, ".3f")
                print(confidence[0])
                cor = out1.cpu().detach()[0].numpy()  # 获取输出的四个坐标值

                x1_o = cor[0] * 224
                y1_o = cor[1] * 224
                x2_o = cor[2] * 224
                y2_o = cor[3] * 224
                # print(x1_o, y1_o, x2_o, y2_o)

                draw = ImageDraw.Draw(img)  # 坐标值形象展示
                # draw.rectangle((x1, y1, x2, y2), "blue", "red")
                draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
                draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
                font = ImageFont.truetype("consola.ttf", 25, encoding="unic")

                draw.text((150, 20), str(confidence[0]), 'fuchsia', font)          #踩坑记:此处写入的文字必须是文字类型
                # img.show()
                plt.imshow(img)  # 自动展示
                plt.pause(0.5)
                plt.clf()

                confidence = out2
                coordinate = out1
                confidence_label = y[:, 0:1]
                coordinate_label = y[:, 1:]

                loss1 = loss_fn1(confidence, confidence_label)
                loss2 = loss_fn2(coordinate, coordinate_label)
                valid_conf_loss += loss1.item()
                valid_coor_loss += loss2.item()
                loss = loss1.item() + loss2.item()
                total_loss += loss

                if i == 10:
                    plt.close()
                    break
                # print(loss)
                # print(total_loss)

            val_avg_conf_loss = valid_conf_loss/10
            val_avg_coor_loss = valid_coor_loss/10
            val_avg_total_loss = total_loss/10
            print("valid:")
            print("epoch:{},val_avg_conf_loss:{:.4f}".format(epoch, val_avg_conf_loss))
            print("epoch:{},val_avg_coor_loss:{:.4f}".format(epoch, val_avg_coor_loss))
            print("epoch:{},val_avg_total_loss:{:.4f}".format(epoch, val_avg_total_loss))
            print()

        print("epoch:{},train_acc:{:.4f}".format(epoch, train_avgacc))

4、测试训练的结果

测试文件一

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net
from torch.utils import data
import numpy
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F


if __name__ == '__main__':
    batch_size = 1
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
    # save_params = "./param.pth"
    save_net = "./minionsnet1.pth"
    test_data = dataset1.Mydata(data_path, label_path)
    test_loader = data.DataLoader(test_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    net = torch.load(save_net).to(device)
    # loss_fn1 = nn.BCELoss()
    # loss_fn2 = nn.MSELoss()
    #
    # optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
    net.train()
    train_conf_loss = 0
    train_coor_loss = 0
    total_loss = 0
    net.train()

    for epoch in range(1,40001):
        train_conf_loss = 0
        train_coor_loss = 0
        total_loss = 0
        l1 = []
        l2 = []
        l3 = []
        l4 = []
        a = []
        b = []
        c = []
        d = []
        # train_acc = 0
        for i, (x, y) in enumerate(test_loader):
            # print(x)
            # print(y)
            x = x.to(device)
            y = y.to(device)
            coordinate, confident = net(x)
            # print(coordinate)
            # print(confident)
            # exit()

            confidence = confident
            coordinate = coordinate

            confidence_label = y[:, 0:1]

            # print(confidence_label,"========")
            coordinate_label = y[:, 1:]
           
            for m in coordinate.cpu().detach().numpy():
                for k in m:
                    l1.append(k)
            for n in confident.cpu().detach().numpy():
                l2.append(n)
            for m in confidence_label.cpu().numpy():
                l3.append(m)
            # print(l2,"l2")
            # print(l2,"l3")
            for n in coordinate_label.cpu().numpy():
                for k in n:
                    l4.append(k)

        print("total     :")
        print("r2       :", r2_score(l3+l4, l2+l1))
        print("explained:", explained_variance_score(l3+l4, l2+l1))
        print("meanabs  :", mean_absolute_error(l3+l4, l2+l1))
        print("meansq   :", mean_squared_error(l3+l4, l2+l1))

测试文件二

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net

if __name__ == '__main__':
    batch_size = 1
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
    # save_params = "./param.pth"
    save_net = "./minionsnet1.pth"
    test_data = dataset1.Mydata(data_path, label_path)
    test_loader = data.DataLoader(test_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    net = torch.load(save_net).to(device)
    net.train()
    for epoch in range(1,11):
        j = 0
        for i, (x, y) in enumerate(test_loader):

            img_or_np = x.cpu().detach().numpy()  #将图像数据转成numpy
            img_np = (img_or_np*0.5+0.5)*255      #恢复原始图像
            img_np = img_np[0]
            # print(img_np.shape)

            img_np = img_np.swapaxes(0, 1)        #从CHW转为HWC
            img_np = img_np.swapaxes(1, 2)
            # print(img_np.shape)
            # print(img_np[0].shape)
            img = Image.fromarray(numpy.uint8(img_np))   #注意:此处不加numpy.uint8()会报错
            # print(img.shape)
            # img.show()
            # img.close()
            x = x.to(device)
            y = y.to(device)
            out1, out2 = net(x)
            # print(out1, "out1")
            # print(out2, "out2")
            label_coor = y.cpu().detach().numpy()[0]   #获取标签的四个坐标值
            x1_l = label_coor[1]*224
            y1_l = label_coor[2]*224
            x2_l = label_coor[3]*224
            y2_l = label_coor[4]*224
            # print(label_coor)
            # print(y, "yyyyyyyyyyyyyyyyyyyyy")
            confidence = out2.cpu().detach().numpy()[0]
            confidence = format(confidence, ".3f")
            # print(confidence)
            cor = out1.cpu().detach()[0].numpy()   #获取输出的四个坐标值

            x1_o = cor[0]*224
            y1_o = cor[1]*224
            x2_o = cor[2]*224
            y2_o = cor[3]*224

            draw = ImageDraw.Draw(img)                                       #坐标值形象展示

            draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
            draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
            font = ImageFont.truetype("consola.ttf", 25, encoding="unic")
            # draw.text((100, 50), u'I am minions!', 'fuchsia', font)
            draw.text((150, 20), confidence, 'fuchsia', font)
            # img.show()
            plt.imshow(img)                                                    #自动展示
            plt.pause(0.3)
            plt.clf()
            # plt.close()

            # print("epoch: {},  第 {} 张".format(epoch, j))
            j += 1
  • 1
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
跟踪器(tracker)是计算机视觉中的一个重要工具,用于跟踪视频中的目标。基于 CNN-Transformer 的跟踪器是近年来比较流行的一种跟踪器,下面我们来介绍其代码实现。 首先,我们需要导入必要的库,包括 PyTorch、NumPy、argparse 和 cv2。 ```python import torch import numpy as np import argparse import cv2 ``` 接着,我们需要定义一些超参数,包括输入图像的大小、batch size、模型的路径等。 ```python # 超参数 input_size = 224 batch_size = 10 model_path = 'model.pth' ``` 然后,我们需要定义一个函数 `get_model`,用于加载模型。 ```python def get_model(model_path): model = torch.load(model_path) model.eval() return model ``` 接着,我们需要定义一个函数 `preprocess`,用于对输入图像进行预处理。具体来说,我们需要对图像进行缩放、裁剪、标准化等操作。 ```python def preprocess(img): img = cv2.resize(img, (input_size, input_size)) img = img.astype(np.float32) / 255. img = (img - 0.5) / 0.5 img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, axis=0) img = np.repeat(img, batch_size, axis=0) return img ``` 接着,我们需要定义一个函数 `track`,用于跟踪视频中的目标。具体来说,我们需要读取视频、初始化跟踪器、读取每一帧图像、对图像进行预处理、输入到模型中进行预测、更新跟踪器的状态等操作。 ```python def track(video_path, model_path): # 加载模型 model = get_model(model_path) # 初始化跟踪器 tracker = cv2.TrackerKCF_create() # 读取视频 cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print('Failed to open video file:', video_path) return # 读取第一帧图像 ret, frame = cap.read() if not ret: print('Failed to read first frame of video file:', video_path) return # 选择 ROI bbox = cv2.selectROI(frame, False) ok = tracker.init(frame, bbox) # 处理每一帧图像 while True: # 读取图像 ret, frame = cap.read() if not ret: break # 对图像进行预处理 img = preprocess(frame) # 输入到模型中进行预测 with torch.no_grad(): output = model(torch.Tensor(img)) # 更新跟踪器的状态 bbox = tracker.update(frame) if ok: p1 = (int(bbox[0]), int(bbox[1])) p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])) cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1) # 显示图像 cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # 释放资源 cap.release() cv2.destroyAllWindows() ``` 最后,我们使用 argparse 模块来解析命令行参数,从而方便地调用 `track` 函数。 ```python if __name__ == '__main__': parser = argparse.ArgumentParser(description='Track object in a video using CNN-Transformer') parser.add_argument('video_path', type=str, help='path to the input video') parser.add_argument('model_path', type=str, help='path to the pre-trained model') args = parser.parse_args() track(args.video_path, args.model_path) ``` 这样,我们就完成了基于 CNN-Transformer 的跟踪器代码实现
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值