目录
1、概述
1、追踪对象
2、追踪目的
当追踪对象出现在任意场景时能够通过训练处的网络模型精准识别并进行定位
3、预备素材
20张卡通图像:
若干背景图(越多越好)
2、准备训练样本
1、将带透明通道的卡通图像存放在本地文件夹中
2、下载背景图片(至少1000张)到本地
3、制造训练样本
本次训练有正样本和负样本两类。
正样本:背景图上有卡通人图片,标签置信度为1,还包含卡通图的四个坐标值(左上角和右下角的坐标),网络模型学习的是图像的四个坐标值和置信度。
负样本:背景图上无卡通人图片,标签置信为0,负样本的四个坐标值为0
import os
import numpy as np
from PIL import Image
def gen_datasets(bg_path, minions_path, img_path, label_path):
count = 0
with open(label_path, "w") as f:
for filename in os.listdir(bg_path): #遍历文件列表 操作背景图
bg_img = Image.open("{0}/{1}".format(bg_path, filename))
bg_img = bg_img.convert("RGB") #转换通道
bg_img = bg_img.resize((224, 224)) #统一大小
bg_img.save("{}/{}.png".format(img_path, count)) #保存变化后的图像
f.write("{}.png {} {} {} {} {}\n".format(count, 0, 0, 0, 0, 0)) #负样本
count += 1
name = np.random.randint(1, 21)
minions_img = Image.open("{}/{}.png".format(minions_path, name))
new_w = np.random.randint(50, 100)
new_h = np.random.randint(50, 100)
resize_img = minions_img.resize((new_w, new_h)) #随机缩放
rot_img = resize_img.rotate(np.random.randint(-180,180)) #随机旋转
paste_x1 = np.random.randint(0, 224-new_w)
paste_y1 = np.random.randint(0, 224-new_h)
r, g, b, a = rot_img.split() #划分出透明通道
bg_img.paste(rot_img, (paste_x1, paste_y1), mask=a) #在透明通道上粘贴 #合并小黄人与背景图
paste_x2 = paste_x1+new_w
paste_y2 = paste_y1+new_h
bg_img.save("{}/{}.png".format(img_path, count)) #保存处理后的图片
f.write("{}.png {} {} {} {} {}\n".format(
count, 1, paste_x1, paste_y1, paste_x2, paste_y2))
count += 1
print(count)
if count > 1000:
print(count)
break
if __name__ == '__main__':
bg_img = r"D:\Desktop\Learnn\Minions\back_ground_dir"
minions_img = r"D:\Desktop\Learnn\Minions\minions_dir"
root_dir = r"D:\Desktop\Learnn\Minions"
# train_img = r"./train_img"
train_img = os.path.join(root_dir, "train_img")
validate_img = os.path.join(root_dir, "validate_img")
test_img = os.path.join(root_dir, "test_img")
for i in (train_img,validate_img, test_img):
if not os.path.isdir(i):
os.makedirs(train_img)
train_label = r"./train_label.txt"
validate_label = r"./validate_label.txt"
test_label = r"./test_label.txt"
gen_datasets(bg_img, minions_img, train_img, train_label)
gen_datasets(bg_img, minions_img, validate_img, validate_label)
gen_datasets(bg_img, minions_img, test_img, test_label)
生成的样本(部分截图)
生成的标签(部分截图)
3、模型训练
1、构建采样器
from torch.utils import data
import os
from PIL import Image
from torchvision import transforms
import torch
class Mydata(data.Dataset):
def __init__(self, img_path, lab_path):
self.dataset = []
with open(lab_path, "r") as f:
filenames = f.readlines()
for filename in filenames:
filename = filename.split()
self.dataset.append([os.path.join(img_path, filename[0]), filename[1:6]]) #将数据的绝对路径和标签放在一起
# print(img_path,"*********************************************")
# print(self.dataset)
def __len__(self):
# print(len(self.dataset))
return len(self.dataset)
def __getitem__(self, item):
data = self.dataset[item] #取得一张图片的路径,包含路径和标签
# print(data)
# yc = (data[1][0])
# x1 = float(data[1][1])/224
# y1 = float(data[1][2])/224
# x2 = float(data[1][3])/224
# y2 = float(data[1][4])/224
a = self.dataset[item][1][0:5]
y = [float(a[0])]
for i in a[1:5]:
b = float(i)/224
y.append(b)
# print(y)
x = self.data_scale(Image.open(data[0])) #缩放
# x = torch.tensor(x)
y = torch.tensor(y)
# print(x,"xxxxxxxxxx")
# print(y,"yyyyyyyyyyy")
return x, y
def data_scale(self, x):
return transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])(x)
if __name__ == '__main__': #测试数据是否可用
label_path = r"D:\Desktop\Learnn\Minions\train_label.txt"
data_path = r"D:\Desktop\Learnn\Minions\train_img"
# data = data.DataLoader(mydata,100,shuffle=True) #加载数据,每次10张
# train_label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
# train_img_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"
mydata = Mydata(data_path, label_path)
data = data.DataLoader(mydata, 10, shuffle=True)
for i, (x1, y1) in enumerate(data):
x = x1.cuda()
y = y1.cuda()
# print(x1, "xxxxx")
# print(y1, "yyyyyy")
2、构建训练的文件
from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.con1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1, #224
padding=1, dilation=1, groups=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d((2,2),2)) #112
# self.fc = nn.Linear(64*112*112, 5)
self.con2 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1,
padding=1, dilation=1, groups=4),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d((2,2),2)) #56
self.con3 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1,
padding=1, dilation=1, groups=8),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d((2,2),2)) #28
self.con4 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, stride=1,
padding=1, dilation=1, groups=8),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d((2,2),2)) #14
self.con5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1,
padding=1, dilation=1, groups=4),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d((2,2),2)) #7
self.con6 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1,
padding=1, dilation=1, groups=4),
nn.BatchNorm2d(128),
nn.ReLU()) #7
self.con7 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, stride=1,
padding=1, dilation=1, groups=4),
nn.BatchNorm2d(32),
nn.ReLU()) #3
self.con8 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, stride=1,
padding=1, dilation=1, groups=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.AvgPool2d((2,2),2))
self.fc = nn.Linear(16 * 3 * 3, 5)
def forward(self, x):
y = self.con1(x)
# print(y.shape)
y = self.con2(y)
# print(y.shape)
y = self.con3(y)
# print(y.shape)
y = self.con4(y)
# print(y.shape)
y = self.con5(y)
# print(y.shape)
y = self.con6(y)
# print(y.shape)
y = self.con7(y)
# print(y.shape)
y = self.con8(y)
# print(y.shape)
# print("=======================")
y = torch.reshape(y, [y.size(0), -1])
# print(y.shape)
y = self.fc(y)
coordinate = torch.relu(y[:, 1:])
confident = torch.sigmoid(y[:, 0])
return coordinate, confident
if __name__ == '__main__':
batch_size1 = 5 #训练集相关数据
data_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_img"
label_path1 = r"C:\Users\Administrator\Desktop\Learnn\Minions\train_label.txt"
# save_params = "./minionsparams1.pth"
save_net = "./minionsnet1.pth"
train_data = dataset1.Mydata(data_path1, label_path1)
train_loader = data.DataLoader(train_data, batch_size1, shuffle=True)
batch_size =10 #验证集相关数据
data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_img"
label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\validate_label.txt"
validation_data = dataset1.Mydata(data_path, label_path)
validation_loader = data.DataLoader(validation_data, batch_size, shuffle=True)
if torch.cuda.is_available():
device =torch.device("cuda")
else:
device = torch.device("cpu")
# net = Net().to(device)
# net.load_state_dict(torch.load(save_params))
net = torch.load(save_net).to(device)
loss_fn1 = nn.BCELoss()
loss_fn2 = nn.MSELoss()
optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
net.train()
for epoch in range(4000):
train_conf_loss = 0
train_coor_loss = 0
total_loss = 0
l1 = []
l2 = []
l3 = []
l4 = []
# train_acc = 0
for i, (x, y) in enumerate(train_loader):
x = x.to(device)
y = y.to(device)
coordinate, confident = net(x)
confidence = confident
coordinate = coordinate
confidence_label = y[:, 0:1]
coordinate_label = y[:, 1:]
loss1 = loss_fn1(torch.reshape(confidence, (confidence.size(0), -1)), confidence_label) #踩坑记:如果直接confidence不转化形状程序可以运行但是会报错
loss2 = loss_fn2(coordinate, coordinate_label)
loss = loss1*0.8 + loss2*0.2
optim.zero_grad()
loss.backward()
optim.step()
train_conf_loss += loss1.item() #转到CPU上运算
train_coor_loss += loss2.item()
total_loss += loss.item()
# print("==================")
for m in coordinate.cpu().detach().numpy():
for k in m:
l1.append(k)
for n in confident.cpu().detach().numpy():
l2.append(n)
for m in confidence_label.cpu().numpy():
l3.append(m)
for n in coordinate_label.cpu().numpy():
for k in n:
l4.append(k)
print("coor :")
print("r2 :", r2_score(l4, l1))
print("explained:", explained_variance_score(l4, l1))
print("meanabs :", mean_absolute_error(l4, l1))
print("meansq :", mean_squared_error(l4, l1))
print()
print("conf :")
print("r2 :", r2_score(l3, l2)) #踩坑记:标签和预测值位置放反则会导致r2得分为负数
print("explained:", explained_variance_score(l3, l2))
print("meanabs :", mean_absolute_error(l3, l2))
print("meansq :", mean_squared_error(l3, l2))
print()
train_avg_conf_loss = train_conf_loss/len(train_data)
train_avg_coor_loss = train_coor_loss/len(train_data)
train_avg_total_loss = total_loss / len(train_data)
print("train:")
print("epoch:{},train_avg_conf_loss:{:.4f}".format(epoch, train_avg_conf_loss))
print("epoch:{},train_avg_coor_loss:{:.6f}".format(epoch, train_avg_coor_loss))
print("epoch:{},train_avg_total_loss:{:.4f}".format(epoch, train_avg_total_loss))
print()
# torch.save(net, r"./minionsnet2.pth")
# torch.save(net.state_dict(), save_params)
#验证
if epoch % 10 == 0:
valid_conf_loss = 0
valid_coor_loss = 0
total_loss = 0
for i, (x, y) in enumerate(validation_loader):
img_or_np = x.cpu().detach().numpy() # 将图像数据转成numpy
img_np = (img_or_np * 0.5 + 0.5) * 224 # 恢复原始像素
img_np = img_np[0]
# print(img_np.shape)
img_np = img_np.swapaxes(0, 1) # 从CHW转为HWC
img_np = img_np.swapaxes(1, 2)
# print(img_np.shape)
# print(img_np[0].shape)
img = Image.fromarray(numpy.uint8(img_np)) # 注意:此处不加numpy.uint8()会报错
# print(img.shape)
# img.show()
# img.close()
x = x.to(device) #将参数传入网络
y = y.to(device)
out1, out2 = net(x)
out2 = torch.reshape(out2, (out2.size(0), -1))
# print(out1, "out1")
# print(out2, "out2")
label_coor = y.cpu().detach().numpy()[0] # 获取标签的四个坐标值
x1_l = label_coor[1] * 224
y1_l = label_coor[2] * 224
x2_l = label_coor[3] * 224
y2_l = label_coor[4] * 224
# print(label_coor)
# print(y, "yyyyyyyyyyyyyyyyyyyyy")
confidence = out2.cpu().detach().numpy()[0]
# confidence = format(confidence, ".3f")
print(confidence[0])
cor = out1.cpu().detach()[0].numpy() # 获取输出的四个坐标值
x1_o = cor[0] * 224
y1_o = cor[1] * 224
x2_o = cor[2] * 224
y2_o = cor[3] * 224
# print(x1_o, y1_o, x2_o, y2_o)
draw = ImageDraw.Draw(img) # 坐标值形象展示
# draw.rectangle((x1, y1, x2, y2), "blue", "red")
draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
font = ImageFont.truetype("consola.ttf", 25, encoding="unic")
draw.text((150, 20), str(confidence[0]), 'fuchsia', font) #踩坑记:此处写入的文字必须是文字类型
# img.show()
plt.imshow(img) # 自动展示
plt.pause(0.5)
plt.clf()
confidence = out2
coordinate = out1
confidence_label = y[:, 0:1]
coordinate_label = y[:, 1:]
loss1 = loss_fn1(confidence, confidence_label)
loss2 = loss_fn2(coordinate, coordinate_label)
valid_conf_loss += loss1.item()
valid_coor_loss += loss2.item()
loss = loss1.item() + loss2.item()
total_loss += loss
if i == 10:
plt.close()
break
# print(loss)
# print(total_loss)
val_avg_conf_loss = valid_conf_loss/10
val_avg_coor_loss = valid_coor_loss/10
val_avg_total_loss = total_loss/10
print("valid:")
print("epoch:{},val_avg_conf_loss:{:.4f}".format(epoch, val_avg_conf_loss))
print("epoch:{},val_avg_coor_loss:{:.4f}".format(epoch, val_avg_coor_loss))
print("epoch:{},val_avg_total_loss:{:.4f}".format(epoch, val_avg_total_loss))
print()
print("epoch:{},train_acc:{:.4f}".format(epoch, train_avgacc))
4、测试训练的结果
测试文件一
from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net
from torch.utils import data
import numpy
import sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score,mean_absolute_error, mean_squared_error, r2_score,explained_variance_score
import torch.nn.functional as F
if __name__ == '__main__':
batch_size = 1
data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
# save_params = "./param.pth"
save_net = "./minionsnet1.pth"
test_data = dataset1.Mydata(data_path, label_path)
test_loader = data.DataLoader(test_data, batch_size, shuffle=True)
if torch.cuda.is_available():
device =torch.device("cuda")
else:
device = torch.device("cpu")
net = torch.load(save_net).to(device)
# loss_fn1 = nn.BCELoss()
# loss_fn2 = nn.MSELoss()
#
# optim = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
net.train()
train_conf_loss = 0
train_coor_loss = 0
total_loss = 0
net.train()
for epoch in range(1,40001):
train_conf_loss = 0
train_coor_loss = 0
total_loss = 0
l1 = []
l2 = []
l3 = []
l4 = []
a = []
b = []
c = []
d = []
# train_acc = 0
for i, (x, y) in enumerate(test_loader):
# print(x)
# print(y)
x = x.to(device)
y = y.to(device)
coordinate, confident = net(x)
# print(coordinate)
# print(confident)
# exit()
confidence = confident
coordinate = coordinate
confidence_label = y[:, 0:1]
# print(confidence_label,"========")
coordinate_label = y[:, 1:]
for m in coordinate.cpu().detach().numpy():
for k in m:
l1.append(k)
for n in confident.cpu().detach().numpy():
l2.append(n)
for m in confidence_label.cpu().numpy():
l3.append(m)
# print(l2,"l2")
# print(l2,"l3")
for n in coordinate_label.cpu().numpy():
for k in n:
l4.append(k)
print("total :")
print("r2 :", r2_score(l3+l4, l2+l1))
print("explained:", explained_variance_score(l3+l4, l2+l1))
print("meanabs :", mean_absolute_error(l3+l4, l2+l1))
print("meansq :", mean_squared_error(l3+l4, l2+l1))
测试文件二
from torch.utils import data
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import dataset1
import numpy
from PIL import Image, ImageDraw, ImageFont
import cv2
from train_net1 import Net
if __name__ == '__main__':
batch_size = 1
data_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_img"
label_path = r"C:\Users\Administrator\Desktop\Learnn\Minions\test_label.txt"
# save_params = "./param.pth"
save_net = "./minionsnet1.pth"
test_data = dataset1.Mydata(data_path, label_path)
test_loader = data.DataLoader(test_data, batch_size, shuffle=True)
if torch.cuda.is_available():
device =torch.device("cuda")
else:
device = torch.device("cpu")
net = torch.load(save_net).to(device)
net.train()
for epoch in range(1,11):
j = 0
for i, (x, y) in enumerate(test_loader):
img_or_np = x.cpu().detach().numpy() #将图像数据转成numpy
img_np = (img_or_np*0.5+0.5)*255 #恢复原始图像
img_np = img_np[0]
# print(img_np.shape)
img_np = img_np.swapaxes(0, 1) #从CHW转为HWC
img_np = img_np.swapaxes(1, 2)
# print(img_np.shape)
# print(img_np[0].shape)
img = Image.fromarray(numpy.uint8(img_np)) #注意:此处不加numpy.uint8()会报错
# print(img.shape)
# img.show()
# img.close()
x = x.to(device)
y = y.to(device)
out1, out2 = net(x)
# print(out1, "out1")
# print(out2, "out2")
label_coor = y.cpu().detach().numpy()[0] #获取标签的四个坐标值
x1_l = label_coor[1]*224
y1_l = label_coor[2]*224
x2_l = label_coor[3]*224
y2_l = label_coor[4]*224
# print(label_coor)
# print(y, "yyyyyyyyyyyyyyyyyyyyy")
confidence = out2.cpu().detach().numpy()[0]
confidence = format(confidence, ".3f")
# print(confidence)
cor = out1.cpu().detach()[0].numpy() #获取输出的四个坐标值
x1_o = cor[0]*224
y1_o = cor[1]*224
x2_o = cor[2]*224
y2_o = cor[3]*224
draw = ImageDraw.Draw(img) #坐标值形象展示
draw.rectangle((x1_o, y1_o, x2_o, y2_o), outline="red")
draw.rectangle((x1_l, y1_l, x2_l, y2_l), outline="blue")
font = ImageFont.truetype("consola.ttf", 25, encoding="unic")
# draw.text((100, 50), u'I am minions!', 'fuchsia', font)
draw.text((150, 20), confidence, 'fuchsia', font)
# img.show()
plt.imshow(img) #自动展示
plt.pause(0.3)
plt.clf()
# plt.close()
# print("epoch: {}, 第 {} 张".format(epoch, j))
j += 1