Pytorch实现小黄人侦测(实战)

下面完整代码在github仓库:传送门


一、批量合成小黄人和背景图片

import os
import numpy as np
from PIL import Image

def gen_datasets(bg_path, minions_path, img_path, label_path):
    count = 0
    with open(label_path, "w") as f:
        for filename in os.listdir(bg_path):
            bg_img = Image.open("{0}/{1}".format(bg_path, filename))
            bg_img = bg_img.convert("RGB")
            bg_img = bg_img.resize((224, 224))
            bg_img.save("{0}/{1}.png".format(img_path, count))  # 保存背景图片
            f.write("{}.png {} {} {} {} {}\n".format(count, 0, 0, 0, 0, 0))  # 在txt文件写入背景图片标签

            count += 1
            name = np.random.randint(1, 21)
            minions_img = Image.open("{}/{}.png".format(minions_path, name))

            new_w = np.random.randint(50, 100)
            new_h = np.random.randint(50, 100)
            resize_img = minions_img.resize((new_w, new_h))  # 随机缩放
            rot_img = resize_img.rotate(np.random.randint(-45, 45))  # 经过处理后得到的小黄人的图片

            paste_x1 = np.random.randint(0, 224-new_w)
            paste_y1 = np.random.randint(0, 224-new_h)

            r, g, b, a = rot_img.split()
            bg_img.paste(rot_img, (paste_x1, paste_y1), mask=a)  # 背景图片上粘贴小黄人
            paste_x2 = paste_x1 + new_w
            paste_y2 = paste_y1 + new_h

            bg_img.save("{}/{}.png".format(img_path, count))  # 保存合成图片
            f.write("{}.png {} {} {} {} {}\n".format(
                count, 1, paste_x1, paste_y1, paste_x2, paste_y2))  # 在txt文件写入合成图片标签

            count += 1

            if count == 1500:
                print(count)
                break

if __name__ == '__main__':
    # 背景图片路径
    bg_img1 = r"D:\Train_Data_bg"
    bg_img2 = r"D:\PycharmProjects\2020-09-08-minions_reg\Dataset\Bg_Image_train"
    bg_img3 = r"./Dataset/Bg_Image_test"

    minions_img = r"./Dataset/Minions_Image"  # 小黄人图片路径

    train_img = r"./Dataset/Train_Data"  # 合成图片
    validate_img = r"./Dataset/Validate_Data"
    test_img = r"./Dataset/Test_Data"

    train_label = r"./Target/train_label.txt"  # 训练图片标签
    validate_label = r"./Target/validate_label.txt"
    test_label = r"./Target/test_label.txt"

    # gen_datasets(bg_img1, minions_img, train_img, train_label)
    gen_datasets(bg_img2, minions_img, validate_img, validate_label)
    # gen_datasets(bg_img3, minions_img, test_img, test_label)

二、定义需要训练的数据集

import torch
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
import os
import numpy as np
import PIL.Image as pimg


class Train_Data(Dataset):

    def __init__(self, root, txt_path):  # 定义路径和数据标准化
        self.files_name = []
        self.labels_data = []

        f = open(txt_path, "r")
        for line in f:  # 遍历训练集文件
            line = line.strip()
            img_name = line.split()

            img_path = os.path.join(root, img_name[0])
            self.files_name.append(img_path)
            # print(self.files_name)

            # print(img_name)

            c = float(img_name[1])
            x1 = float(img_name[2])/224  # 将标签数据转成float,归一化
            y1 = float(img_name[3])/224
            x2 = float(img_name[4])/224
            y2 = float(img_name[5])/224

            label_data=np.array([x1, y1, x2, y2, c]).astype(np.float32)  # 将标签数据转成float32, [0. 0. 0. 0. 0.]

            self.labels_data.append(label_data)  # 将标签存在一个列表中
            # print(self.labels_data)  # [array([0., 0., 0., 0., 0.], dtype=float32)...] 依次装标签进去

        self.labels_data = np.array(self.labels_data)  # 将标签转成numpy
        # print("=======")
        # print(self.labels_data)  # [[0. 0. 0. 0. 0. ]...], 获得图片标签的numpy矩阵
        f.close()

    def __len__(self):
        return len(self.files_name)  # 获得文件长度

    def __getitem__(self, index):
        file = self.files_name[index]  # 根据索引获得每个文件名路径
        # print(file)
        img_data = self.image_preprocess(pimg.open(file))  # 根据文件名路径打开图像数据

        xs = img_data  # 数据标准化处理
        ys = self.labels_data[index]  # 根据索引获得标签数据
        # print(ys)
        return xs, ys

    def image_preprocess(self, x):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])(x)

# 数据标准化过程

txt_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Target\train_label.txt"
train_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Dataset\Train_Data"
train_data = Train_Data(root=train_path, txt_path=txt_path)

# data = DataLoader(dataset=train_data,batch_size=10, shuffle=True)
# 
# for img,label in data:
#     print(img.dtype)
#     print(label.dtype)
#     print(img.size())
#     print(label.size())

三、搭建网络模型

from torch import nn,optim
import torch.nn.functional as F
import torch
from torch.autograd import Variable

class Net(nn.Module):

    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        )#batch*64*112*112

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        )#batch*128*56*56

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        )#batch*256*28*28

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=256,out_channels=512,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        )#batch*512*14*14

        self.conv5 = nn.Sequential(
            nn.Conv2d(in_channels=512,out_channels=256,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        )#batch*256*7*7

        self.conv56 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        )#batch*128*3*3

        self.conv6 = nn.Sequential(
            nn.Conv2d(in_channels=128,out_channels=64,kernel_size=3,stride=1,padding=0),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU()
        )#batch*64*1*1

        self.fcn = nn.Sequential(
            nn.Linear(in_features=64*1*1,out_features=5),

        )

    def forward(self, x):
        # print(x.type())
        # x = torch.FloatTensor(x)
        y1 = self.conv1(x)
        y2 = self.conv2(y1)
        y3 = self.conv3(y2)
        y4 = self.conv4(y3)
        y5 = self.conv5(y4)
        y5 = self.conv56(y5)
        y6 = self.conv6(y5)

        y6 = y6.reshape(y6.size(0), -1)
        output = self.fcn(y6)  # [N, 5]

        output1 = F.relu(output[:, :4])  # [N, 4]

        output2 = torch.sigmoid(output[:, 4:])  # [N, 1]

        return output1, output2

四、开始训练数据

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import PIL.Image as pimg
import PIL.ImageFont as Font
import PIL.ImageDraw as draw
from Dataset_train2 import train_data
from Dataset_validate2 import validate_data
from Net_Model import Net
from tensorboardX import SummaryWriter
from sklearn.metrics import r2_score, explained_variance_score
from tensorboardX import SummaryWriter

plt.rcParams['font.sans-serif'] = ['SimHei']
params_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Params\s1.pth"

font_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Font\ARIALNB.TTF"
writer = SummaryWriter("./logs")

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

net = Net().to(device)
net.load_state_dict(torch.load("./Params/273.pth"))

coord_loss = nn.MSELoss()#coord坐标点损失使用均方差函数来算
c_loss = nn.BCELoss()#confidence置信度损失使用二分类交叉熵函数来算
optimizer = optim.Adam(net.parameters(), lr=1e-3)

plt.ion()
a = []
b = []
d = []
e = []
net.train()
for epoch in range(1000):
    train_loader = DataLoader(dataset=train_data,batch_size=20, shuffle=True)
    validate_loader = DataLoader(dataset=validate_data,batch_size=10, shuffle=True)
    train_loss = 0
    # for i, (img, label) in enumerate(train_loader):
    #     img = img.to(device)  # torch.Size([20, 3, 224, 224])
    #     label = label.to(device)  # torch.Size([20, 5])
    #
    #     out1, out2 = net(img)
    #     # print(out1.shape)  # torch.Size([20, 4])
    #     # print(out2.shape)  # torch.Size([20, 1])
    #
    #     label1 = label[:, :4]  # 四个坐标值
    #     # print(label1.shape)  # torch.Size([20, 4])
    #     label2 = label[:, 4:]  # 一个置信度
    #     # print(label2.shape)  # torch.Size([20, 1])
    #
    #     loss1 = coord_loss(out1, label1)
    #     loss2 = c_loss(out2, label2)
    #     loss = loss1+loss2  # 坐标值和置信度的总损失
    #     train_loss += loss.item() * label.size(0)
    #     # train_loss += loss.detach().cpu().numpy() * label.size(0)
    #
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
    #     if i % 100 == 0:
    #
    #         print("epoch:{}, i:{}, train_loss:{:.6f}".format(epoch, i, loss.item()))
    # torch.save(net.state_dict(), "./Params/{}.pth".format(epoch))

    mean_train_loss = train_loss / len(train_data)
    eval_loss = 0
    label_list_coord = []
    output_list_coord = []
    label_list_con = []
    output_list_con = []
    for i, (img, label) in enumerate(validate_loader):

        img = img.to(device)
        label = label.to(device)
        # print(img.shape)  # torch.Size([10, 3, 224, 224])
        # print(label.shape)  # torch.Size([10, 5])

        _out1, _out2 = net(img)  # 形状分别为torch.Size([10, 4])、torch.Size([10, 1])

        _label1 = label[:, :4]  # torch.Size([10, 4])
        print(_label1)
        print(_out1)
        _label2 = label[:, 4:]  # torch.size([10, 1])

        _loss1 = coord_loss(_out1, _label1)  # 十张验证图片做坐标值损失
        _loss2 = c_loss(_out2, _label2)  # 十张验证图片做置信度损失
        _loss = _loss1 + _loss2  # 总损失

        eval_loss += _loss.item() * label.size(0)  # 放到cpu上运行
        # eval_loss += _loss.detach().cpu().numpy() * label.size(0)


        label_list_coord.append(_label1.cpu().numpy().reshape(-1))
        print(label_list_coord)

        output_list_coord.append(_out1.data.cpu().numpy().reshape(-1))
        print(output_list_coord)
        exit()

        label_list_con.append(_label2.cpu().numpy().reshape(-1))
        output_list_con.append(_out2.data.cpu().numpy().reshape(-1))

        coord_label = _label1.cpu().data.numpy()
        coord_out = _out1.cpu().data.numpy()

        c_label = _label2.cpu().data.numpy()
        c_out = _out2.cpu().data.numpy()
        # print(_out1)
        # print(coord_out)
        # print(coord_out[0][0])
        # print(c_out)

        # 将输出的坐标值乘以224转回为原来的图片大小的尺寸,拿到十张图片的第一张图片输出的坐标值和置信度
        out_x1 = coord_out[0][0] * 224
        out_y1 = coord_out[0][1] * 224
        out_x2 = coord_out[0][2] * 224
        out_y2 = coord_out[0][3] * 224
        out_confidence = c_out[0][0]

        # 拿到十张图片的第一张图片目标的坐标值和置信度
        label_x1 = coord_label[0][0] * 224
        label_y1 = coord_label[0][1] * 224
        label_x2 = coord_label[0][2] * 224
        label_y2 = coord_label[0][3] * 224
        label_confidence = c_label[0][0]


        # print("label_coord:", label_x1, label_y1, label_x2, label_y2)
        # print("output_coord:", out_x1, out_y1, out_x2, out_y2)
        # print("label_confidences:", label_confidence)
        # print("output_confidences:", out_confidence)

        if i % 10 == 0:
            # plt.clf()

            print('epoch: {}, train_loss: {:.3}, validate_loss: {:.3}'.format(epoch,_loss.item(),_loss.item()))  # 损失值显示3位

            arr = (img[0].cpu().numpy()*0.5+0.5) * 255 # 将图片数据转numpy数据,并转到0-255之间

            # print(np.shape(arr))#(3, 224, 224)

            array = np.transpose(arr, [1, 2, 0])
            # print(np.shape(array))#(224, 224, 3)
            img = pimg.fromarray(np.uint8(array))
            imgdraw = draw.ImageDraw(img)

            imgdraw.rectangle((label_x1, label_y1, label_x2, label_y2), outline="blue")
            imgdraw.rectangle((out_x1, out_y1, out_x2, out_y2), outline="red")

            font = Font.truetype(font_path, size=10)
            imgdraw.text(xy=(label_x1, label_y1), text=str(label_confidence), fill="blue", font=font)
            imgdraw.text(xy=(out_x1, out_y1), text=str("{:.2f}".format(out_confidence)), fill="red", font=font)
            plt.imshow(img)
            plt.pause(0.1)

    mean_eval_loss = eval_loss / len(validate_data)

    # 画损失曲线,验证曲线观察是否过拟合
    # plt.clf()
    # plt.figure()
    # # plt.subplot(2, 1, 1)
    # plt.title("观察是否过拟合")
    # a.append(epoch)
    # b.append(mean_train_loss)
    # plt.plot(a, b, c="r", label="train_loss")

    # plt.subplot(2, 1, 2)
    # d.append(epoch)
    # e.append(mean_eval_loss)
    # plt.plot(d, e, c="b", label="validate_loss")
    # plt.legend()
    #
    # plt.xlabel("epoch")
    # plt.ylabel("损失值")
    # plt.pause(1)
    # plt.close()

    # 评估坐标值和置信度
    r2 = r2_score(label_list_coord, output_list_coord)
    var = explained_variance_score(label_list_coord, output_list_coord)
    print("r2_score评估坐标值:", r2)
    # print("可解释性方差评估坐标值:", var)
    _r2 = r2_score(label_list_con, output_list_con)
    _var = explained_variance_score(label_list_con, output_list_con)
    print("r2_score评估置信度:", _r2)
    # print("可解释性方差评估置信度:", _var)

    writer.add_scalars("loss", {"mean_train_loss": mean_train_loss, "test_loss": mean_eval_loss}, epoch)

plt.ioff()

五、测试小黄人图片

import torch
from Net_Model import Net
import os
import PIL.Image as pimg
import PIL.ImageFont as Font
import PIL.ImageDraw as draw
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2
from sklearn.metrics import r2_score,explained_variance_score

test_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Dataset\Test_Data"
params_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Params"
font_path = r"D:\PycharmProjects\2020-09-08-minions_reg\Dataset\ARIALNB.TTF"

net = Net()
net.load_state_dict(torch.load("./Params/273.pth"))
net.eval()

for file in os.listdir(test_path):

# for file in glob.glob(r"D:\PycharmProjects\2020-09-08-minions_reg\Dataset\Test_Data\*.png"):
    # img = pimg.open(file)

    img = pimg.open("{0}/{1}".format(test_path, file))  # D:\PycharmProjects\2020-09-08-minions_reg\Dataset\Test_Data  0.png
    # img_array = pimg.open(os.path.join(test_path,file))
    img_array = (np.array(img)/255-0.5)/0.5
    trans_array = np.transpose(img_array, [2, 0, 1])

    # 转成tensor数据类型,并且增加一个维度方便传进网络
    tensor_array = torch.from_numpy(trans_array)
    input_array = torch.unsqueeze(tensor_array, dim=0)#增加一个维度
    out_array = input_array.float()

    # print(cuda_array.shape)
    #
    # print(cuda_array.dtype)
    # print(cuda_array.tolist())
    out1, out2 = net(out_array)

    coord_out = out1.cpu().data.numpy()
    # print(coord_out)

    c_out = out2.cpu().data.numpy()

    out_x1 = coord_out[0][0] * 224
    out_y1 = coord_out[0][1] * 224
    out_x2 = coord_out[0][2] * 224
    out_y2 = coord_out[0][3] * 224
    out_confidence = c_out[0][0]
    # print(out_x1)
    # exit()

    print("output_coord:", out_x1, out_y1, out_x2, out_y2)
    print("output_confidences:", out_confidence)
    # print(img_array)

    # 利用PIL显示图片
    img = pimg.fromarray(np.uint8((img_array*0.5+0.5)*255))
    imgdraw = draw.ImageDraw(img)

    imgdraw.rectangle((out_x1, out_y1, out_x2, out_y2), outline="red")
    font = Font.truetype(font_path, size=10)

    imgdraw.text(xy=(out_x1, out_y1), text=str("confident:{:.2}".format(out_confidence)), fill="red", font=font)
    plt.title("测试")
    plt.imshow(img)
    plt.pause(0.1)

    # 利用opencv显示图片
    # img = cv2.imread(file)
    # cv2.rectangle(img, (int(out_x1), int(out_x2)), (int(out_x2), int(out_y2)), [0, 0, 255], 1)
    # cv2.imshow("测试图片", img)
    # cv2.waitKey(1000)
    # cv2.destroyAllWindows()


六、辅助工具

6.1 爬取背景图片

import urllib.request
import urllib.parse
import re
import os
#添加header,其中Referer是必须的,否则会返回403错误,User-Agent是必须的,这样才可以伪装成浏览器进行访问
header=\
{
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
     "referer":"https://image.baidu.com"
    }
url = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word={word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&cg=girl&pn={pageNum}&rn=30&gsm=1e00000000001e&1490169411926="
keyword = input("请输入搜索关键字:")
#转码
keyword = urllib.parse.quote(keyword,'utf-8')

n = 0
j = 0

while(n<3000):
    error = 0
    n+=30
    #url
    url1 = url.format(word=keyword,pageNum=str(n))
    #获取请求
    rep = urllib.request.Request(url1,headers=header)
    #打开网页
    rep = urllib.request.urlopen(rep)
    #获取网页内容
    try:
        html = rep.read().decode('utf-8')
        # print(html)
    except:
        print("出错了!")
        error = 1
        print("出错页数:"+str(n))
    if error == 1:
        continue
    #正则匹配
    p = re.compile("thumbURL.*?\.jpg")
    #获取正则匹配到的结果,返回list
    s = p.findall(html)
    if os.path.isdir("D://pic") != True:
        os.makedirs("D://pic")
    with open("testpic.txt","a") as f:
        #获取图片
        for i in s:
            print(i)
            i = i.replace('thumbURL":"','')
            print(i)
            f.write(i)
            f.write("\n")
            #保存图片
            urllib.request.urlretrieve(i,"D://pic/89pic{num}.jpg".format(num=j))
            j+=1
        f.close()
print("总共爬取图片数为:"+str(j))

七:效果展示

在这里插入图片描述

  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值