yolov3 python代码_yolov3代码详解（三）

最新推荐文章于 2024-04-14 08:45:00 发布

weixin_39564831

最新推荐文章于 2024-04-14 08:45:00 发布

阅读量639

点赞数

文章标签： yolov3 python代码

import glob

import random

import os

import sys

import numpy as np

from PIL import Image

import torch

import torch.nn.functional as F

from utils.augmentations import horisontal_flip

from torch.utils.data import Dataset

import torchvision.transforms as transforms

##########################################################################

#图像预处理，形成tensor

##########################################################################

"""

本文件的主要作用 pad_to_square把图像调整为方形，resize调整图像大小，random_resize随机调整图像大小

ImageFolder读取data/samples下的所有图像，调整为方形，调整大小，

生成张量，为detect.py提供输入

"""

#调整为方形

def pad_to_square(img, pad_value):#图片调整为方形,调用F.pad实现，在ImageFolder和ListDataset被引用

c, h, w = img.shape

dim_diff = np.abs(h - w)

# (upper / left) padding and (lower / right) padding

pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2

# Determine padding

pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)

# Add padding

img = F.pad(img, pad, "constant", value=pad_value)

return img, pad#返回调整后的img 和pad(0,0,pad1,pad2)

#改变图片大小

def resize(image, size):#调整图像大小，在ImageFolder和ListDataset被引用

image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) #可使用的上采样算法

return image

#多尺度训练时使用，没有被引用

def random_resize(images, min_size=288, max_size=448):#图像大小随机调整为(288,320,352,384,416)之一，多尺度训练。

new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]

images = F.interpolate(images, size=new_size, mode="nearest")

return images

#遍历文件夹下的测试图片

class ImageFolder(Dataset): #默认读取data/samples的所有图像，在detect.py中被引用

def __init__(self, folder_path, img_size=416):

#sorted(iterable[, cmp[, key[, reverse]]])

#sorted() 函数对所有可迭代的对象进行排序操作

##glob.glob 获取指定目录下的所有文件

self.files = sorted(glob.glob("%s/*.*" % folder_path)) #获取data/samples的所有图像地址列表例如：data/samples/dog.jpg等

self.img_size = img_size

def __getitem__(self, index):

img_path = self.files[index % len(self.files)] #获取列表下所有文件路径

# Extract image as PyTorch tensor

img = transforms.ToTensor()(Image.open(img_path))

# Pad to square resolution #填充为正方形

img, _ = pad_to_square(img, 0)

# Resize

img = resize(img, self.img_size)

return img_path, img

def __len__(self):

return len(self.files)

#生成图像和其标签组成的列表，在train.py和test.py中被引用

class ListDataset(Dataset):

#通过coco.data里面配置的data/coco/trainvalno5k.txt,和valid=data/coco/5k.txt找到相应的图像和标签

#trainvalno5k.txt里面放的是绝对路径 /XX/XX/1.jpg

"""

self.img_files 得到图片路径：data/custom/images/train.jpg

self.label_files 得到标签路径：data/custom/images/train.txt

"""

def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):

with open(list_path, "r") as file:

self.img_files = file.readlines() #图片地址的列表例：data/custom/images/train.jpg

self.label_files = [ #生成对应的标签地址文件列表 #由图像地址文件trainvalno5k.txt，生成对应的标签地址文件

path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")

#trainvalno5k.txt内容形如data/custom/images/train.jpg,通过替换其labels,txt,jpg生成对应的标签列表

for path in self.img_files

]

self.img_size = img_size #416

self.max_objects = 100 #表示一副图像最多含有的标签数量

self.augment = augment #true

self.multiscale = multiscale #true

self.normalized_labels = normalized_labels #true

self.min_size = self.img_size - 3 * 32 #图像最小320

self.max_size = self.img_size + 3 * 32 #图像最大512

self.batch_count = 0 #batch初始为0

#提取图片，得到正方形化的图片大小和同样缩放过后的标签信息

def __getitem__(self, index):

# ---------

# Image

#提取图像路径，读取图像，转为RGB,调整为方形

# ---------

##提取图像路径，由图像序列号，找到对应图像

img_path = self.img_files[index % len(self.img_files)].rstrip()

# Extract image as PyTorch tensor

img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) #读取图像，转为RGB，加入张量

# Handle images with less than three channels

if len(img.shape) != 3: #图像低于3通道，升成3通道

img = img.unsqueeze(0)

img = img.expand((3, img.shape[1:]))

_, h, w = img.shape #获取图像的高和宽

h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)

# Pad to square resolution

img, pad = pad_to_square(img, 0) #调整为方形，得到调整后的图像和pad，视为pad参数

_, padded_h, padded_w = img.shape #获取图像调整后的高和宽，视为缩放参数

# ---------

# Label

#提取标签路径，和图像等比例缩放标签。

# ---------

label_path = self.label_files[index % len(self.img_files)].rstrip()

targets = None

if os.path.exists(label_path):

boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))

#numpy中有两个函数可以用来读取文件，主要是txt文件, 第一个是loadtxt

# Extract coordinates for unpadded + unscaled image

# 由中心点模式，获取原始box左上角和右下角的像素

x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)

y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)

x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)

y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)

# Adjust for added padding

#由pad参数计算出标签框经过pad操作后的位置

x1 += pad[0]

y1 += pad[2]

x2 += pad[1]

y2 += pad[3]

# Returns (x, y, w, h)#获得图像的标签经过pad和缩放处理后的位置。即在0到1之间

boxes[:, 1] = ((x1 + x2) / 2) / padded_w

boxes[:, 2] = ((y1 + y2) / 2) / padded_h

boxes[:, 3] *= w_factor / padded_w

boxes[:, 4] *= h_factor / padded_h

targets = torch.zeros((len(boxes), 6)) #boxes(类别,x,y,w,h)

targets[:, 1:] = boxes #(0，类别,x,y,w,h)即在0到1之间

# Apply augmentations

if self.augment: #随机进行镜像操作

if np.random.random() < 0.5:

img, targets = horisontal_flip(img, targets)

return img_path, img, targets

"""

targets的第一位是0

一张image对应的n个target(这个是张量)，并且target[:,0]=0(即前面提到的targets的第一位是0)，

target[:,0]表示的是对应image的ID。在训练的时候collate_fn函数都会把所有target融合在一起成为

一个张量(targets = torch.cat(targets, 0))，只有这个张量的第一位(target[:,0])才可以判断

这个target属于哪一张图片(即能够匹配图像ID)。

"""

#赋值给torch.utils.data.DataLoader的collate_fn，实现自定义的batch输出，每10个批次随机设置一次图像大小，

#从而实现多尺度训练，在train.py，test.py被引用，但是此处没有让targets一同缩放。

#batch_count当前是第几批

def collate_fn(self, batch):#整理

paths, imgs, targets = list(zip(*batch)) #以batch为单位，提取paths, imgs, targets列表

# Remove empty placeholder targets

targets = [boxes for boxes in targets if boxes is not None] #去除targets里的空值

# Add sample index to targets

for i, boxes in enumerate(targets):

boxes[:, 0] = i #target[:,0]表示的是对应image的batch内的ID。

targets = torch.cat(targets, 0)#沿着targets的0维batch内image的ID进行连接生成的tensor[target1][target2][target3]...[target_batch]

#在训练的时候collate_fn函数都会把所有target融合在一起成为一个张量(targets = torch.cat(targets, 0))，

# 只有这个张量的第一位(target[:,0])才可以判断这个target属于哪一张图片(即能够匹配图像ID

# Selects new image size every tenth batch

if self.multiscale and self.batch_count % 10 == 0:#每10个batch重新随机设置一下图像的大小

self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) #stack叠加生成新的tensor [image1,image2,image3...iamge_batch]

# Resize images to input shape

imgs = torch.stack([resize(img, self.img_size) for img in imgs])

self.batch_count += 1

return paths, imgs, targets #此处应该注意，每10批重设一下图像大小，但是没有对应重设targets大小

def __len__(self):

return len(self.img_files) #返回ListDataset中图像的个数一共几张图片

weixin_39564831

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
yolov3 python代码_yolov3代码详解（三）

import globimport randomimport osimport sysimport numpy as npfrom PIL import Imageimport torchimport torch.nn.functional as Ffrom utils.augmentations import horisontal_flipfrom torch.utils.data import...
复制链接

扫一扫