注:需要下面打包好的代码文件,可私聊博主(免费)
目录
1.统计数据集中包含的类别数量
这一步是为了后续改写数据集格式yaml文件所需要,为必要步骤,具体代码如下。
整个文件唯一需要改动的地方为xml_path变量,填写xml标签所在的文件夹即可。运行后即可得到数据集的所有类别及各自目标数量。
import os
import random
import xml.etree.ElementTree as ET
clses = []
cls_num = {}
xml_path = r'Annotations'
temp_xml = os.listdir(xml_path)
total_xml = []
for xml in temp_xml:
if xml.endswith(".xml"):
total_xml.append(xml)
for name in total_xml:
try:
in_file = open(os.path.join(xml_path, name), encoding='utf-8')
tree = ET.parse(in_file)
root = tree.getroot()
except:
in_file = open(os.path.join(xml_path, name))
tree = ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
cls = obj.find('name').text
clses.append(cls)
if cls not in cls_num.keys():
cls_num[cls] = 1
else:
cls_num[cls] += 1
result = list(set(clses))
print(result)
print(cls_num)
with open('cls_classes.txt',"w") as f: #设置文件对象
for i in result:
f.write(i)
f.write('\n')
2.划分数据集并转换为YOLO格式(txt格式)
整个代码块需要改的地方共有四处。
1.classes = [ ],需要改为1中所生成的类别,或者是自己想要的类别,可能存在数据集中某些类别不需要的情况,则可以自动筛选,仅保留自己需要的类别。
2.train_k和val_k 这两个用来控制数据集的划分比例,计算公式为:
训练集(train):1*train_k*val_k
验证集(val): train_k*(1 - val_k)
测试集(test): 1 - train_k
3.VOC数据集格式的存放路径, dirs
需要改为自己VOC数据集的存放地址,如你的VOC文件夹目录为
VOC
JPEGImages
Annotations
则需将dirs改为‘VOC’,运行下列代码后,VOC文件夹目录会成为
VOC
JPEGImages
Annotations
images
labels
其中images文件夹包含划分好的train,val,test图片;labels文件夹包含着划分好的yolo格式数据集标签,与images中的图片一一对应。
import xml.etree.ElementTree as ET
import os
import random
from shutil import copy
from tqdm import tqdm
import shutil
import cv2
classes = []
# 训练集 验证集 测试集比例
train_k = 0.8
val_k = 0.75
# VOC格式数据集地址
dirs = r'VOC'
random.seed(0)
def clear_hidden_files(path):
dir_list = os.listdir(path)
for i in dir_list:
abspath = os.path.join(os.path.abspath(path), i)
if os.path.isfile(abspath):
if i.startswith("._"):
os.remove(abspath)
else:
clear_hidden_files(abspath)
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(label_path, xml_path):
try:
in_file = open(xml_path, 'r', encoding='utf-8')
out_file = open(label_path, 'w')
# print(xml_path)
out_tmp = set()
tree = ET.parse(in_file)
root = tree.getroot()
except:
in_file = open(xml_path, 'r')
out_file = open(label_path, 'w')
# print(xml_path)
out_tmp = set()
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
if w <= 0 or h <= 0:
tmp_img_path = xml_path.split('.')[0].split('/')[-1]
tmp_img = cv2.imread(os.path.join(dirs, 'JPEGImages', tmp_img_path + '.jpg'))
h, w, c = tmp_img.shape
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_tmp.add(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
for i in out_tmp:
out_file.write(i)
in_file.close()
out_file.close()
wd = os.getcwd()
wd = os.getcwd()
if os.path.isdir(os.path.join(dirs, "labels")):
shutil.rmtree(os.path.join(dirs, "labels"), True)
if not os.path.isdir(os.path.join(dirs, "labels")):
os.mkdir(os.path.join(dirs, "labels"))
dir_label = os.path.join(dirs, "labels")
if not os.path.isdir(os.path.join(dir_label, "train")):
os.mkdir(os.path.join(dir_label, "train"))
if not os.path.isdir(os.path.join(dir_label, "val")):
os.mkdir(os.path.join(dir_label, "val"))
if not os.path.isdir(os.path.join(dir_label, "test")):
os.mkdir(os.path.join(dir_label, "test"))
jpg_dirs = os.path.join(dirs, 'images')
if os.path.isdir(os.path.join(dirs, "images")):
shutil.rmtree(os.path.join(dirs, "images"), True)
if not os.path.isdir(jpg_dirs):
os.mkdir(jpg_dirs)
jpg_train_dir = os.path.join(jpg_dirs, 'train')
if not os.path.isdir(jpg_train_dir):
os.mkdir(jpg_train_dir)
jpg_val_dir = os.path.join(jpg_dirs, 'val')
if not os.path.isdir(jpg_val_dir):
os.mkdir(jpg_val_dir)
jpg_test_dir = os.path.join(jpg_dirs, 'test')
if not os.path.isdir(jpg_test_dir):
os.mkdir(jpg_test_dir)
label_dir = os.path.join(dirs, "labels")
clear_hidden_files(label_dir)
train_label_dir = os.path.join(label_dir, "train")
val_label_dir = os.path.join(label_dir, "val")
test_label_dir = os.path.join(label_dir, "test")
train_anno_dir = r''
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
val_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_test.txt"), 'w')
train_file.close()
val_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
val_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_test.txt"), 'a')
jpg_dir = os.path.join(dirs, 'JPEGImages')
clear_hidden_files(jpg_dir)
jpg_dir_total = os.listdir(jpg_dir)
random.shuffle(jpg_dir_total)
list_train = jpg_dir_total[:int(len(jpg_dir_total) * train_k * val_k)]
list_val = jpg_dir_total[int(len(jpg_dir_total) * train_k * val_k): int(len(jpg_dir_total) * train_k)]
list_test = jpg_dir_total[int(len(jpg_dir_total) * train_k):]
for i in tqdm(range(0, len(list_train))):
image_path = os.path.join(jpg_dir, list_train[i])
voc_path = list_train[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(train_label_dir, label_name)
xml_path = os.path.join(dirs, 'Annotations', annotation_name)
train_file.write(image_path + '\n')
# convert_annotation(label_path, xml_path) # convert label
new_path = os.path.join(jpg_train_dir, list_train[i])
copy(image_path, new_path)
copy(xml_path, os.path.join(label_dir, 'train', annotation_name))
for i in tqdm(range(0, len(list_val))):
image_path = os.path.join(jpg_dir, list_val[i])
voc_path = list_val[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(val_label_dir, label_name)
xml_path = os.path.join(dirs, 'Annotations', annotation_name)
val_file.write(image_path + '\n')
# convert_annotation(label_path, xml_path) # convert label
new_path = os.path.join(jpg_val_dir, list_val[i])
copy(image_path, new_path)
copy(xml_path, os.path.join(label_dir, 'val', annotation_name))
for i in tqdm(range(0, len(list_test))):
image_path = os.path.join(jpg_dir, list_test[i])
voc_path = list_test[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(test_label_dir, label_name)
xml_path = os.path.join(dirs, 'Annotations', annotation_name)
test_file.write(image_path + '\n')
# convert_annotation(label_path, xml_path) # convert label
new_path = os.path.join(jpg_test_dir, list_test[i])
copy(image_path, new_path)
copy(xml_path, os.path.join(label_dir, 'test', annotation_name))
train_file.close()
val_file.close()
test_file.close()
3.修改数据集yaml
首先我们在data文件夹下找到VOC.yaml,如下所示
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOC ← downloads here (2.8 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007
# Classes
nc: 20 # number of classes
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm import tqdm
from utils.general import download, Path
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
# Convert
path = dir / f'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
image_ids = f.read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format
然后我们将其复制一份,改名为test.yaml(个人习惯,可自行随意选择),并将下列代码粘贴进去
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOC ← downloads here (2.8 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
# path: ./VOC
train: ./VOC/images/train
val: ./VOC/images/val
test: ./VOC/images/test
# number of classes
nc: 0
# class names
names: ['']
注意,我们需要改动几个参数。
如你的数据集命名为VOC,符合2中的目录结构,则只需要改动nc和names,
nc改为你所需要的类别数,比如names:['apple'],则只需要填写1
names改为2中classes = []中的内容即可,注意!!括号中的顺序必须两者保持一致,否则会出现数据类别对应不上的问题!!
如你的数据集命名不为VOC,则建议你改为VOC(皮一下),可以省去很多麻烦
4.修改train.py中的数据集格式yaml路径
终于最后一步了,胜利就在眼前,不要放弃!!
首先我们将train.py拉到最后,你会看到一排这样的代码
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/test.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=150)
parser.add_argument('--batch-size', type=int, default=32, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--noplots', action='store_true', help='save no plot files')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
真的是又多又长,看的都头疼,不过没关系,我们仅需要改动一行即可。
看到第五行,就是这一行
parser.add_argument('--data', type=str, default=ROOT / 'data/test.yaml', help='dataset.yaml path')
仅需要改动default=ROOT / 'data/test.yaml' 将最后的test.yaml改为xxxx.yaml即可,xxxx为刚刚3中自己改好的名字。
大功告成,请开始你的表演吧哈哈哈