【笔记】yolo v3 : dataloader.py

最新推荐文章于 2023-07-21 07:51:46 发布

程序猿的探索之路

最新推荐文章于 2023-07-21 07:51:46 发布

阅读量387

点赞数 2

分类专栏：小菜鸡加油文章标签： python

本文链接：https://blog.csdn.net/nyist_yangguang/article/details/120769643

版权

小菜鸡加油专栏收录该内容

399 篇文章 31 订阅

订阅专栏

注1：

注2：

注3：

注4：

x.shape (416, 416, 3)

注5：

无图，float [0,1] , int [0，255]才可以显示

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

注6：

python 切片索引：

eg1：

aaa=[[[1,2]],[[3,4]],[[5,6]]]
aaa=np.array(aaa)
aaa.shape
Out[194]: (3, 1, 2)
aaa[...,0]
Out[195]: 
array([[1],
       [3],
       [5]])
aaa
Out[196]: 
array([[[1, 2]],
       [[3, 4]],
       [[5, 6]]])
aaa[:,0]
Out[197]: 
array([[1, 2],
       [3, 4],
       [5, 6]])
aaa[0,:]
Out[198]: array([[1, 2]])
aaa[0,0,:]
Out[199]: array([1, 2])
aaa[:,0,0]
Out[200]: array([1, 3, 5])
aaa[:,0,1]
Out[201]: array([2, 4, 6])
aaa[0][0][1]
Out[202]: 2
aaa[0][0][:]
Out[203]: array([1, 2])
aaa
Out[204]: 
array([[[1, 2]],
       [[3, 4]],
       [[5, 6]]])

eg2：

aaa=[[1,2],[3,4]]
aaa=np.array(aaa)
aaa[...,0]
Out[165]: array([1, 3])
aaa
Out[166]: 
array([[1, 2],
       [3, 4]])
aaa[...,0][:]
Out[167]: array([1, 3])
aaa[...,0][0]
Out[168]: 1
aaa[...,0][1]
Out[169]: 3
aaa[...,0][aaa[...,0]==1]
Out[170]: array([1])
aaa[...,0][aaa[...,0]==3]
Out[171]: array([3])
[aaa[...,0]]
Out[172]: [array([1, 3])]
[aaa[...,0]==1]
Out[173]: [array([ True, False])]

eg3：

注7:

注8：

a.shape
Out[9]: (2, 2)
a
Out[10]: 
array([[1, 2],
       [3, 4]])
a[0][0]
Out[11]: 1
a[[False,True]]
Out[12]: array([[3, 4]])
a
Out[13]: 
array([[1, 2],
       [3, 4]])
a[np.array([False,True])]
Out[14]: array([[3, 4]])

Code:

from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from nets.yolo_training import Generator
import cv2


class YoloDataset(Dataset):
    def __init__(self, train_lines, image_size):
        super(YoloDataset, self).__init__()

        self.train_lines = train_lines   # file_path , box , label
        self.train_batches = len(train_lines)
        self.image_size = image_size    # (416,416)

    def __len__(self):
        return self.train_batches

    def rand(self, a=0, b=1):    # 生成一个取值范围为[a,b)的随机数
        return np.random.rand() * (b - a) + a

    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
        """进行数据增强的随机预处理"""
        """
        数据增强的方式:
          数据增强其实就是让图片变得更加多样,数据增强是非常重要的提高目标检测算法鲁棒性的手段。
          可以通过改变亮度、图像扭曲等方式使得图像变得更加多种多样，改变后的图片放入神经网络进行训练可以提高网络的鲁棒性，降低各方面额外因素对识别的影响.
        
        param annotation_line: 数据集中的某一行对应的图片
        param input_shape: yolo网络输入图片的大小416 * 416
        param jitter: 控制图片的宽高的扭曲比率, jitter = .3，表示在0.3到1.3之间进行扭曲
        param hue: 代表hsv色域中三个通道中的色调进行扭曲，色调（H）=.1
        param sat: 代表hsv色域中三个通道中的饱和度进行扭曲，饱和度(S) = 1.5
        param val: 代表hsv色域中三个通道中的明度进行扭曲，明度（V）=1.5
        """

        line = annotation_line.split()   # 默认换行符分割
        image = Image.open(line[0])

        image = image.convert('RGB')   # 三通道 复制 一样数据

        iw, ih = image.size
        h, w = input_shape
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])  # 对该行的图片中的目标框进行一个划分

        # 对图像进行缩放并且进行长和宽的扭曲
        # 扭曲后的图片大小可能会大于416*416的大小，但是在加灰条的时候会修正为416*416
        # 调整图片大小
        #  表原图片的宽高的扭曲比率，jitter=0,则原图的宽高的比率不变，否则对图片的宽和高进行一定的扭曲
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)   # [0.25,2]
        if new_ar < 1:
            nh = int(scale * h)
            nw = int(nh * new_ar)
        else:
            nw = int(scale * w)
            nh = int(nw / new_ar)
        image = image.resize((nw, nh), Image.BICUBIC)

        # 放置图片
        # 将图像多余的部分加上灰条，一定保证图片的大小为w,h = 416,416
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
        new_image = Image.new('RGB', (w, h),
                              (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
        new_image.paste(image, (dx, dy))
        image = new_image

        # 是否翻转图片
        flip = self.rand() < .5
        if flip:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)  # 左右翻转

        # 色域变换
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)

        x = cv2.cvtColor(np.array(image, np.float32)/255, cv2.COLOR_RGB2HSV) # 将图片从RGB图像调整到hsv色域上之后，再对其色域进行扭曲
        x[..., 0] += hue*360
        x[..., 0][x[..., 0]>1] -= 1
        x[..., 0][x[..., 0]<0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255

        # 调整目标框坐标
        box_data = np.zeros((len(box), 5))
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            if flip:
                box[:, [0, 2]] = w - box[:, [2, 0]]
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
            box_data = np.zeros((len(box), 5))
            box_data[:len(box)] = box
        if len(box) == 0:
            return image_data, []

        if (box_data[:, :4] > 0).any():
            return image_data, box_data
        else:
            return image_data, []

    def __getitem__(self, index):
        # if index == 0:
        #     shuffle(self.train_lines)
        lines = self.train_lines
        # n = self.train_batches
        # index = index % n

        img, y = self.get_random_data(lines[index], self.image_size[0:2])  # self.image_size[:]  self.image_size 均可
        if len(y) != 0:
            # 从坐标转换成0~1的百分比,故返回targets的四个参数为x, y, w, h，四个参数均为 0< x, y, w, h，<1 的。
            boxes = np.array(y[:, :4], dtype=np.float32)
            boxes[:, 0] = boxes[:, 0] / self.image_size[1]
            boxes[:, 1] = boxes[:, 1] / self.image_size[0]
            boxes[:, 2] = boxes[:, 2] / self.image_size[1]
            boxes[:, 3] = boxes[:, 3] / self.image_size[0]

            boxes = np.maximum(np.minimum(boxes, 1), 0)
            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]

            boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2
            boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2
            y = np.concatenate([boxes, y[:, -1:]], axis=-1)

        img = np.array(img, dtype=np.float32)

        tmp_inp = np.transpose(img / 255.0, (2, 0, 1))
        tmp_targets = np.array(y, dtype=np.float32)
        return tmp_inp, tmp_targets


# DataLoader中collate_fn使用
def yolo_dataset_collate(batch):
    images = []
    bboxes = []
    for img, box in batch:
        images.append(img)
        bboxes.append(box)
    images = np.array(images)
    bboxes = np.array(bboxes)
    return images, bboxes

from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from nets.yolo_training import Generator
import cv2


class YoloDataset(Dataset):
    def __init__(self, train_lines, image_size):
        super(YoloDataset, self).__init__()

        self.train_lines = train_lines   # file_path , box , label
        self.train_batches = len(train_lines)
        self.image_size = image_size    # (416,416)

    def __len__(self):
        return self.train_batches

    def rand(self, a=0, b=1):    # 生成一个取值范围为[a,b)的随机数
        return np.random.rand() * (b - a) + a

    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
        """进行数据增强的随机预处理"""
        """
        数据增强的方式:
          数据增强其实就是让图片变得更加多样,数据增强是非常重要的提高目标检测算法鲁棒性的手段。
          可以通过改变亮度、图像扭曲等方式使得图像变得更加多种多样，改变后的图片放入神经网络进行训练可以提高网络的鲁棒性，降低各方面额外因素对识别的影响.
        
        param annotation_line: 数据集中的某一行对应的图片
        param input_shape: yolo网络输入图片的大小416 * 416
        param jitter: 控制图片的宽高的扭曲比率, jitter = .3，表示在0.3到1.3之间进行扭曲
        param hue: 代表hsv色域中三个通道中的色调进行扭曲，色调（H）=.1
        param sat: 代表hsv色域中三个通道中的饱和度进行扭曲，饱和度(S) = 1.5
        param val: 代表hsv色域中三个通道中的明度进行扭曲，明度（V）=1.5
        """

        line = annotation_line.split()   # 默认换行符分割
        image = Image.open(line[0])

        image = image.convert('RGB')   # 三通道 复制 一样数据

        iw, ih = image.size
        h, w = input_shape
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])  # 对该行的图片中的目标框进行一个划分

        # 对图像进行缩放并且进行长和宽的扭曲
        # 扭曲后的图片大小可能会大于416*416的大小，但是在加灰条的时候会修正为416*416
        # 调整图片大小
        #  表原图片的宽高的扭曲比率，jitter=0,则原图的宽高的比率不变，否则对图片的宽和高进行一定的扭曲
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)   # [0.25,2]
        if new_ar < 1:
            nh = int(scale * h)
            nw = int(nh * new_ar)
        else:
            nw = int(scale * w)
            nh = int(nw / new_ar)
        image = image.resize((nw, nh), Image.BICUBIC)

        # 放置图片
        # 将图像多余的部分加上灰条，一定保证图片的大小为w,h = 416,416
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
        new_image = Image.new('RGB', (w, h),
                              (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
        new_image.paste(image, (dx, dy))
        image = new_image

        # 是否翻转图片
        flip = self.rand() < .5
        if flip:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)  # 左右翻转

        # 色域变换
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)

        x = cv2.cvtColor(np.array(image, np.float32)/255, cv2.COLOR_RGB2HSV) # 将图片从RGB图像调整到hsv色域上之后，再对其色域进行扭曲
        x[..., 0] += hue*360
        x[..., 0][x[..., 0]>1] -= 1
        x[..., 0][x[..., 0]<0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255


        # box
        # array([[275, 223, 283, 236, 2],
        #        [179, 213, 204, 243, 2],
        #        [224, 212, 262, 259, 2],
        #        [283, 223, 297, 234, 2]])

        # 调整目标框坐标
        # box_data = np.zeros((len(box), 5))
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            if flip:
                box[:, [0, 2]] = w - box[:, [2, 0]]
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]   #array([ 8, 25, 38, 14])
            box_h = box[:, 3] - box[:, 1]   #array([13, 30, 47, 11])
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 输出下标True的值 保留有效框  array([ True,  True,  True,  True])
            box_data = np.zeros((len(box), 5))
            box_data[:len(box)] = box
        if len(box) == 0:
            return image_data, []

        if (box_data[:, :4] > 0).any():
            return image_data, box_data
        else:
            return image_data, []

    def __getitem__(self, index):
        # if index == 0:
        #     shuffle(self.train_lines)
        lines = self.train_lines
        # n = self.train_batches
        # index = index % n

        img, y = self.get_random_data(lines[index], self.image_size[0:2])  # self.image_size[:]  self.image_size 均可
        if len(y) != 0:
            # 从坐标转换成0~1的百分比,故返回targets的四个参数为x, y, w, h，四个参数均为 0< x, y, w, h，<1 的。
            boxes = np.array(y[:, :4], dtype=np.float32)
            boxes[:, 0] = boxes[:, 0] / self.image_size[1]
            boxes[:, 1] = boxes[:, 1] / self.image_size[0]
            boxes[:, 2] = boxes[:, 2] / self.image_size[1]
            boxes[:, 3] = boxes[:, 3] / self.image_size[0]

            boxes = np.maximum(np.minimum(boxes, 1), 0)
            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]   # 0-1 dx
            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]   # 0-1 dy

            boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2  # core pos
            boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2    # axis = -1 代表最后一个维度
            y = np.concatenate([boxes, y[:, -1:]], axis=-1)    # y[:, -1]    array([2., 2., 2., 2.]) 不保留维度，输出数值，拼成数组
                                                            # y[:, -1:] 保留维度，输出矩阵 y[:, -1:].shape  (4, 1)
                                                            # array([[2.],
                                                            #        [2.],
                                                            #        [2.],
                                                            #        [2.]])

        img = np.array(img, dtype=np.float32)

        tmp_inp = np.transpose(img / 255.0, (2, 0, 1))
        tmp_targets = np.array(y, dtype=np.float32)
        return tmp_inp, tmp_targets


# DataLoader中collate_fn使用
def yolo_dataset_collate(batch):
    images = []
    bboxes = []
    for img, box in batch:
        images.append(img)
        bboxes.append(box)
    images = np.array(images)
    bboxes = np.array(bboxes)
    return images, bboxes

程序猿的探索之路

关注

2
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
【笔记】yolo v3 : dataloader.py

x.shape (416, 416, 3)无图，float [0,1] , int [0，255]才可以显示Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).python 切片索引：eg1：aaa=[...
复制链接

扫一扫