CNN-单目标追踪实战(Torch实现)

目录

1、概述

1、追踪对象

2、追踪目的

3、预备素材

2、准备训练样本

3、模型训练

4、测试训练的结果


1、概述

1、追踪对象

image

2、追踪目的

当追踪对象出现在任意场景时能够通过训练处的网络模型精准识别并进行定位

3、预备素材

20张卡通图像:

若干背景图(越多越好)

2、准备训练样本

1、将带透明通道的卡通图像存放在本地文件夹中

image.png

2、下载背景图片(至少1000张)到本地

image.png

 

3、制造训练样本

本次训练有正样本和负样本两类。

正样本:背景图上有卡通人图片,标签置信度为1,还包含卡通图的四个坐标值(左上角和右下角的坐标),网络模型学习的是图像的四个坐标值和置信度。

负样本:背景图上无卡通人图片,标签置信为0,负样本的四个坐标值为0

import os
import numpy as np
from PIL import Image

def gen_datasets(bg_path, minions_path, img_path, label_path):
    count = 0
    with open(label_path, "w") as f:
        for filename in os.listdir(bg_path):                             #遍历文件列表              操作背景图

            bg_img = Image.open("{0}/{1}".format(bg_path, filename))
            bg_img = bg_img.convert("RGB")                                #转换通道
            bg_img = bg_img.resize((224, 224))                            #统一大小

            bg_img.save("{}/{}.png".format(img_path, count))              #保存变化后的图像

            f.write("{}.png {} {} {} {} {}\n".format(count, 0, 0, 0, 0, 0))   #负样本
            count += 1
            name = np.random.randint(1, 21)
            minions_img = Image.open("{}/{}.png".format(minions_path, name))
            new_w = np.random.randint(50, 100)
            new_h = np.random.randint(50, 100)
            resize_img = minions_img.resize((new_w, new_h))               #随机缩放
            rot_img = resize_img.rotate(np.random.randint(-180,180))      #随机旋转
            paste_x1 = np.random.randint(0, 224-new_w)
            paste_y1 = np.random.randint(0, 224-new_h)
            r, g, b, a = rot_img.split()                                   #划分出透明通道
            bg_img.paste(rot_img, (paste_x1, paste_y1), mask=a)           #在透明通道上粘贴         #合并小黄人与背景图
            paste_x2 = paste_x1+new_w
            paste_y2 = paste_y1+new_h
            bg_img.save("{}/{}.png".format(img_path, count))               #保存处理后的图片
            f.write("{}.png {} {} {} {} {}\n".format(
            count, 1, paste_x1, paste_y1, paste_x2, paste_y2))
            count += 1
            print(count)
            if count > 1000:
                print(count)
                break

if __name__ == '__main__':
    bg_img = r"D:\Desktop\Learnn\Minions\back_ground_dir"
    minions_img = r"D:\Desktop\Learnn\Minions\minions_dir"
    root_dir = r"D:\Desktop\Learnn\Minions"

    # train_img = r"./train_img"
    train_img = os.path.join(root_dir, "train_img")
    validate_img = os.path.join(root_dir, "validate_img")
    test_img = os.path.join(root_dir, "test_img")

    for i in (train_img,validate_img, test_img):
        if not os.path.isdir(i):
            os.makedirs(train_img)

    train_label = r"./train_label.txt"
    validate_label = r"./validate_label.txt"
    test_label = r"./test_label.txt"

    gen_datasets(bg_img, minions_img, train_img, train_label)
    gen_datasets(bg_img, minions_img, validate_img, validate_label)
    gen_datasets(bg_img, minions_img, test_img, test_label)

生成的样本(部分截图)

image.png

生成的标签(部分截图)

image.png

3、模型训练

1、构建采样器

from torch.utils import data
import os
from PIL import Image
from torchvision import transforms
import torch

class Mydata(data.Dataset):
    def __init__(self, img_path, lab_path):
        self.dataset = []
        with open(lab_path, "r") as f:
            filenames = f.readlines()
            for filename in filenames:
                filename = filename.split()
                self.dataset.append([os.path.join(img_path, filename[0]), filename[1:6]])   #将数据的绝对路径和标签放在一起
                # print(img_path,"*********************************************")
            # print(self.dataset)

    def __len__(self):
        # print(len(self.dataset))
        return len(self.dataset)

    def __getitem__(self, item):
        data = self.dataset[item]   #取得一张图片的路径,包含路径和标签
        # print(data)
        # yc = (data[1][0])
        # x1 = float(data[1][1])/224
        # y1 = float(data[1][2])/224
        # x2 = float(data[1][3])/224
        # y2 = float(data[1][4])/224
        a = self.dataset[item][1][0:5]
        y = [float(a[0])]
        for i in a[1:5]:
            b = float(i)/224
            y.append(b)
        # print(y)
        x = self.data_scale(Image.open(data[0]))  #缩放
        # x = torch.tensor(x)
        y = torch.tensor(y)
        # print(x,"xxxxxxxxxx")
        # print(y,"yyyyyyyyyyy")

        return x, y

    def data_scale(self, x):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])(x)

if __name__ == '__main__':   #测试数据是否可用

    label_path = r"D:\Desktop\Learnn\Minions\train_label.txt"
    data_path = r"D:\Desktop\Learnn\Minions\train_img"
    # data = data.DataLoader(mydata,100,shuffle=True)   #加载数据,每次10张
    # train_label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
    # train_img_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"

    mydata = Mydata(data_path, label_path)
    data = data.DataLoader(mydata, 10, shuffle=True)
    for i, (x1, y1) in enumerate(data):
        x = x1.cuda()
        y = y1.cuda()
        # print(x1, "xxxxx")
        # print(y1, "yyyyyy")

2、构建训练的文件

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.con1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1,   #224
                 padding=1, dilation=1, groups=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))       #112
        # self.fc = nn.Linear(64*112*112, 5)
        self.con2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))      #56
        self.con3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=8),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))     #28
        self.con4 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=8),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))    #14
        self.con5 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d((2,2),2))    #7
        self.con6 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(128),
            nn.ReLU())                    #7
        self.con7 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=4),
            nn.BatchNorm2d(32),
            nn.ReLU())   #3
        self.con8 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.AvgPool2d((2,2),2))

        self.fc = nn.Linear(16 * 3 * 3, 5)

    def forward(self, x):
        y = self.con1(x)
        # print(y.shape)
        y = self.con2(y)
        # print(y.shape)
        y = self.con3(y)
        # print(y.shape)
        y = self.con4(y)
        # print(y.shape)
        y = self.con5(y)
        # print(y.shape)
        y = self.con6(y)
        # print(y.shape)
        y = self.con7(y)
        # print(y.shape)
        y = self.con8(y)
        # print(y.shape)
        # print("=======================")
        y = torch.reshape(y, [y.size(0), -1])
        # print(y.shape)

        y = self.fc(y)

        coordinate = torch.relu(y[:, 1:])
        confident = torch.sigmoid(y[:, 0])
        return coordinate, confident


if __name__ == '__main__':
    batch_size1 = 5                 #训练集相关数据
    data_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"
    label_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
    # save_params = "./minionsparams1.pth"
    save_net = "./minionsnet1.pth"
    train_data = dataset1.Mydata(data_path1, label_path1)
    train_loader = data.DataLoader(train_data, batch_size1, shuffle=True)

    batch_size =10                                                               #验证集相关数据
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_label.txt"
    validation_data = dataset1.Mydata(data_path, label_path)
    validation_loader = data.DataLoader(validation_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    # net = Net().to(device)
    # net.load_state_dict(torch.load(save_params))
    net = torch.load(save_net).to(device)

    loss_fn1 = nn.BCELoss()
    loss_fn2 = nn.MSELoss()

    optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)

    net.train()

    for epoch in range(4000):
        train_conf_loss = 0
        train_coor_loss = 0
        total_loss = 0
        l1 = []
        l2 = []
        l3 = []
        l4 = []
        # train_acc = 0
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            coordinate, confident = net(x)
            confidence = confident
            coordinate = coordinate

            confidence_label = y[:, 0:1]
            coordinate_label = y[:, 1:]
            loss1 = loss_fn1(torch.reshape(confidence, (confidence.size(0), -1)), confidence_label)   #踩坑记:如果直接confidence不转化形状程序可以运行但是会报错
            loss2 = loss_fn2(coordinate, coordinate_label)
            loss = loss1*0.8 + loss2*0.2

            optim.zero_grad()
            loss.backward()
            optim.step()

            train_conf_loss += loss1.item()         #转到CPU上运算
            train_coor_loss += loss2.item()
            total_loss += loss.item()


            # print("==================")
            for m in coordinate.cpu().detach().numpy():
                for k in m:
                    l1.append(k)
            for n in confident.cpu().detach().numpy():
                l2.append(n)

            for m in confidence_label.cpu().numpy():
                l3.append(m)
            for n in coordinate_label.cpu().numpy():
                for k in n:
                    l4.append(k)

        print("coor     :")
        print("r2       :", r2_score(l4, l1))
        print("explained:", explained_variance_score(l4, l1))
        print("meanabs  :", mean_absolute_error(l4, l1))
        print("meansq   :", mean_squared_error(l4, l1))
        print()
        print("conf     :")
        print("r2       :", r2_score(l3, l2))      #踩坑记:标签和预测值位置放反则会导致r2得分为负数
        print("explained:", explained_variance_score(l3, l2))
        print("meanabs  :", mean_absolute_error(l3, l2))
        print("meansq   :", mean_squared_error(l3, l2))
        print()
        train_avg_conf_loss = train_conf_loss/len(train_data)
        train_avg_coor_loss = train_coor_loss/len(train_data)
        train_avg_total_loss = total_loss / len(train_data)
        print("train:")
        print("epoch:{},train_avg_conf_loss:{:.4f}".format(epoch, train_avg_conf_loss))
        print("epoch:{},train_avg_coor_loss:{:.6f}".format(epoch, train_avg_coor_loss))
        print("epoch:{},train_avg_total_loss:{:.4f}".format(epoch, train_avg_total_loss))
        print()

        # torch.save(net, r"./minionsnet2.pth")
        # torch.save(net.state_dict(), save_params)

        #验证
        if epoch % 10 == 0:
            valid_conf_loss = 0
            valid_coor_loss = 0
            total_loss = 0
            for i, (x, y) in enumerate(validation_loader):
                img_or_np = x.cpu().detach().numpy()  # 将图像数据转成numpy
                img_np = (img_or_np * 0.5 + 0.5) * 224  # 恢复原始像素
                img_np = img_np[0]
                # print(img_np.shape)

                img_np = img_np.swapaxes(0, 1)  # 从CHW转为HWC
                img_np = img_np.swapaxes(1, 2)
                # print(img_np.shape)
                # print(img_np[0].shape)
                img = Image.fromarray(numpy.uint8(img_np))  # 注意:此处不加numpy.uint8()会报错
                # print(img.shape)
                # img.show()
                # img.close()

                x = x.to(device)                             #将参数传入网络
                y = y.to(device)
                out1, out2 = net(x)
                out2 = torch.reshape(out2, (out2.size(0), -1))

                # print(out1, "out1")
                # print(out2, "out2")
                label_coor = y.cpu().detach().numpy()[0]  # 获取标签的四个坐标值
                x1_l = label_coor[1] * 224
                y1_l = label_coor[2] * 224
                x2_l = label_coor[3] * 224
                y2_l = label_coor[4] * 224

                # print(label_coor)
                # print(y, "yyyyyyyyyyyyyyyyyyyyy")
                confidence = out2.cpu().detach().numpy()[0]
                # confidence = format(confidence, ".3f")
                print(confidence[0])
                cor = out1.cpu().detach()[0].numpy()  # 获取输出的四个坐标值

                x1_o = cor[0] * 224
                y1_o = cor[1] * 224
                x2_o = cor[2] * 224
                y2_o = cor[3] * 224
                # print(x1_o, y1_o, x2_o, y2_o)

                draw = ImageDraw.Draw(img)  # 坐标值形象展示
                # draw.rectangle((x1, y1, x2, y2), "blue", "red")
                draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
                draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
                font = ImageFont.truetype("consola.ttf", 25, encoding="unic")

                draw.text((150, 20), str(confidence[0]), 'fuchsia', font)          #踩坑记:此处写入的文字必须是文字类型
                # img.show()
                plt.imshow(img)  # 自动展示
                plt.pause(0.5)
                plt.clf()

                confidence = out2
                coordinate = out1
                confidence_label = y[:, 0:1]
                coordinate_label = y[:, 1:]

                loss1 = loss_fn1(confidence, confidence_label)
                loss2 = loss_fn2(coordinate, coordinate_label)
                valid_conf_loss += loss1.item()
                valid_coor_loss += loss2.item()
                loss = loss1.item() + loss2.item()
                total_loss += loss

                if i == 10:
                    plt.close()
                    break
                # print(loss)
                # print(total_loss)

            val_avg_conf_loss = valid_conf_loss/10
            val_avg_coor_loss = valid_coor_loss/10
            val_avg_total_loss = total_loss/10
            print("valid:")
            print("epoch:{},val_avg_conf_loss:{:.4f}".format(epoch, val_avg_conf_loss))
            print("epoch:{},val_avg_coor_loss:{:.4f}".format(epoch, val_avg_coor_loss))
            print("epoch:{},val_avg_total_loss:{:.4f}".format(epoch, val_avg_total_loss))
            print()

        print("epoch:{},train_acc:{:.4f}".format(epoch, train_avgacc))

4、测试训练的结果

测试文件一

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net
from torch.utils import data
import numpy
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F


if __name__ == '__main__':
    batch_size = 1
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
    # save_params = "./param.pth"
    save_net = "./minionsnet1.pth"
    test_data = dataset1.Mydata(data_path, label_path)
    test_loader = data.DataLoader(test_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    net = torch.load(save_net).to(device)
    # loss_fn1 = nn.BCELoss()
    # loss_fn2 = nn.MSELoss()
    #
    # optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
    net.train()
    train_conf_loss = 0
    train_coor_loss = 0
    total_loss = 0
    net.train()

    for epoch in range(1,40001):
        train_conf_loss = 0
        train_coor_loss = 0
        total_loss = 0
        l1 = []
        l2 = []
        l3 = []
        l4 = []
        a = []
        b = []
        c = []
        d = []
        # train_acc = 0
        for i, (x, y) in enumerate(test_loader):
            # print(x)
            # print(y)
            x = x.to(device)
            y = y.to(device)
            coordinate, confident = net(x)
            # print(coordinate)
            # print(confident)
            # exit()

            confidence = confident
            coordinate = coordinate

            confidence_label = y[:, 0:1]

            # print(confidence_label,"========")
            coordinate_label = y[:, 1:]
           
            for m in coordinate.cpu().detach().numpy():
                for k in m:
                    l1.append(k)
            for n in confident.cpu().detach().numpy():
                l2.append(n)
            for m in confidence_label.cpu().numpy():
                l3.append(m)
            # print(l2,"l2")
            # print(l2,"l3")
            for n in coordinate_label.cpu().numpy():
                for k in n:
                    l4.append(k)

        print("total     :")
        print("r2       :", r2_score(l3+l4, l2+l1))
        print("explained:", explained_variance_score(l3+l4, l2+l1))
        print("meanabs  :", mean_absolute_error(l3+l4, l2+l1))
        print("meansq   :", mean_squared_error(l3+l4, l2+l1))

测试文件二

from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net

if __name__ == '__main__':
    batch_size = 1
    data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
    label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
    # save_params = "./param.pth"
    save_net = "./minionsnet1.pth"
    test_data = dataset1.Mydata(data_path, label_path)
    test_loader = data.DataLoader(test_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device =torch.device("cuda")
    else:
        device = torch.device("cpu")

    net = torch.load(save_net).to(device)
    net.train()
    for epoch in range(1,11):
        j = 0
        for i, (x, y) in enumerate(test_loader):

            img_or_np = x.cpu().detach().numpy()  #将图像数据转成numpy
            img_np = (img_or_np*0.5+0.5)*255      #恢复原始图像
            img_np = img_np[0]
            # print(img_np.shape)

            img_np = img_np.swapaxes(0, 1)        #从CHW转为HWC
            img_np = img_np.swapaxes(1, 2)
            # print(img_np.shape)
            # print(img_np[0].shape)
            img = Image.fromarray(numpy.uint8(img_np))   #注意:此处不加numpy.uint8()会报错
            # print(img.shape)
            # img.show()
            # img.close()
            x = x.to(device)
            y = y.to(device)
            out1, out2 = net(x)
            # print(out1, "out1")
            # print(out2, "out2")
            label_coor = y.cpu().detach().numpy()[0]   #获取标签的四个坐标值
            x1_l = label_coor[1]*224
            y1_l = label_coor[2]*224
            x2_l = label_coor[3]*224
            y2_l = label_coor[4]*224
            # print(label_coor)
            # print(y, "yyyyyyyyyyyyyyyyyyyyy")
            confidence = out2.cpu().detach().numpy()[0]
            confidence = format(confidence, ".3f")
            # print(confidence)
            cor = out1.cpu().detach()[0].numpy()   #获取输出的四个坐标值

            x1_o = cor[0]*224
            y1_o = cor[1]*224
            x2_o = cor[2]*224
            y2_o = cor[3]*224

            draw = ImageDraw.Draw(img)                                       #坐标值形象展示

            draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
            draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
            font = ImageFont.truetype("consola.ttf", 25, encoding="unic")
            # draw.text((100, 50), u'I am minions!', 'fuchsia', font)
            draw.text((150, 20), confidence, 'fuchsia', font)
            # img.show()
            plt.imshow(img)                                                    #自动展示
            plt.pause(0.3)
            plt.clf()
            # plt.close()

            # print("epoch: {},  第 {} 张".format(epoch, j))
            j += 1
关系抽取实战是指通过自然语言处理技术,从文本中提取出实体之间的关系。而使用PyTorch这一深度学习框架进行关系抽取实战可以具体分为以下步骤: 1. 数据准备:首先需要准备标注好的数据集,其中包含了实体和它们之间的关系。可以使用标注工具,对文本进行逐句标注,将实体和关系标注出来,并生成相应的训练数据。 2. 数据预处理:对准备好的训练数据进行处理,将文本转化为数字化的表示形式,例如将词映射为对应的索引,将实体和关系标签映射为对应的数字。这一过程可以使用PyTorch提供的数据处理工具来完成。 3. 模型设计:选择合适的深度学习模型来进行关系抽取是很重要的。可以使用卷积神经网络CNN)、长短时记忆网络(LSTM)或注意力机制等模型,搭建适合任务的模型架构。使用PyTorch框架可以通过定义模型的网络结构、参数等来实现。 4. 模型训练:将准备好的数据输入到模型中,通过反向传播来更新模型参数,以最小化损失函数。可以使用PyTorch提供的优化器和损失函数来实现模型训练。通过迭代多次训练数据集,不断优化模型,提高其关系抽取的准确性。 5. 模型评估与应用:使用训练好的模型对新数据进行预测,并评估模型的性能。可以使用准确率、召回率、F1值等指标对模型进行评估。同时,可以将关系抽取模型应用于实际场景中,例如从新闻文本中提取实体关系,帮助用户了解实体间的联系。 总而言之,通过PyTorch框架进行关系抽取实战可以帮助我们构建和训练深度学习模型,准确地提取出文本中的实体关系,为实际应用提供有价值的信息。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值