voc目标检测数据加载器模板
import json
from bs4 import BeautifulSoup
import os
import matplotlib.pyplot as plt
from PIL import Image
import time
import numpy as np
import torch
from torch import nn
import torch.nn.functional as f
import torchvision.transforms as tfs
from torch.utils.data import dataloader
import random
# voc数据集的根目录
voc_root = "G:/data/VOC2012/"
class VOCDetection(object):
# 类别和标签的对应关系,一共有20个类
classes_dict = {
# vehicles
## 4轮车
"car": 0, "bus": 1,
## 2轮车
"bicycle": 2, "motorbike": 3, "aeroplane": 4, "boat": 5, "train": 6,
# household
## furniture
### seating
"chair": 7, "sofa": 8,
## furniture
"diningtable": 9, "tvmonitor": 10, "bottle": 11, "pottedplant": 12,
# animals
## Domestic
"cat": 13, "dog": 14,
## Farmyard
"cow": 15, "horse": 16, "sheep": 17,
"bird": 18, "person": 19
}
# 根据索引得到类型名字
className = ["car", "bus","bicycle", "motorbike", "aeroplane",
"boat", "train","chair", "sofa","diningtable", "tvmonitor",
"bottle", "pottedplant","cat", "dog","cow", "horse", "sheep","bird", "person"]
# readInfo为False则如果存在allFile.json就不用重新加载数据
# readInfo为True则无论如何都重新加载数据
# radio代表的是训练集数量的比例,默认为0.8
# mode表示取训练集还是测试集,训练集为 train,测试集为 val
def __init__(self, prefix=None, readInfo=False, radio=0.8, mode="train"):
if(prefix == None):
self.prefix = voc_root
else:
self.prefix = prefix
# 标记路径
self.anno_path = self.prefix+"Annotations/"
# 图片路径
self.image_path = self.prefix+"JPEGImages/"
# 文本信息路径
self.info_path = self.prefix+"info/"
# 训练集的比例
self.radio = radio
# 取训练集还是测试集
self.mode = mode
# 图像正则化参数
self.mean = [0.40789654, 0.44719302, 0.47026115]
self.std = [0.28863828, 0.27408164, 0.27809835]
# 用于记录json信息
self.all_file = {
"dataset":[]
}
if(os.path.exists(self.info_path+"allFile.json") and readInfo==False):
with open(self.info_path+"allFile.json", "r") as f:
self.all_file = json.load(f)
print("allFile.json文件已存在,直接加载信息。。。")
else:
self.getAllInfo()
# 对每个类别进行细分
self.class_file = {
}
if(os.path.exists(self.info_path+"classFile.json") and readInfo==False):
with open(self.info_path+"classFile.json", "r") as f:
self.class_file = json.load(f)
print("classFile.json文件已存在,直接加载信息。。。")
else:
# 将所有类别信息注册到字典中
for i in range(len(self.className)):
self.class_file[self.className[i]] = []
self.getClassInfo()
# 进行测试集和训练集的划分
self.dataset = {
}
self.dataset["train"]