How to train YOLOv5 using custom dataset in Colab
Use labelimg to make your own data set
1.Introduction to labelimg
Labelimg is an open source data labeling tool that can label three formats.
1 VOC tag format, saved as an xml file.
2 Yolo label format, saved as txt file.
3 CreateML tag format, save as json format.
2.Installation of labelimg
Here we mainly talk about the installation in the window system, first open the cmd command line (shortcut key: win+R). Enter the cmd command line console. Enter the following command:
pip install labelimg -i https://pypi.tuna.tsinghua.edu.cn/simple
After running the above command, the system will automatically download the dependencies related to labelimg. Since this is a very lightweight tool, it downloads very quickly. When the following red box appears to tell us that the installation is successful, it means that the labelimg installation is successful.
3 Use labelimg
The format of the self-made dataset is as follows:
JPEGImage stores image files that need to be labeled
Annotations store the labeled label file
Other folders can be ignored. Now our dataset is in VOC format, and the suffix of the annotation file is xml. The YOLOv5 network model requires data values in YOLO format, so the following needs to convert the VOC format to YOLO format, and convert the data in the dataset. The pictures are divided into training set and validation set:
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfile
classes = ["mask", "no-mask"]
#change to your own class
#classes=["ball"]
TRAIN_RATIO = 80
def clear_hidden_files(path):
dir_list = os.listdir(path)
for i in dir_list:
abspath = os.path.join(os.path.abspath(path), i)
if os.path.isfile(abspath):
if i.startswith("._"):
os.remove(abspath)
else:
clear_hidden_files(abspath)
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' %image_id)
out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' %image_id, 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
in_file.close()
out_file.close()
wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "VOCdevkit/")
if not os.path.isdir(data_base_dir):
os.mkdir(data_base_dir)
work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
if not os.path.isdir(work_sapce_dir):
os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
if not os.path.isdir(annotation_dir):
os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
if not os.path.isdir(yolo_labels_dir):
os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):
os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):
os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):
os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):
os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):
os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):
os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir) # list image files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0,len(list_imgs)):
path = os.path.join(image_dir,list_imgs[i])
if os.path.isfile(path):
image_path = image_dir + list_imgs[i]
voc_path = list_imgs[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
annotation_path = os.path.join(annotation_dir, annotation_name)
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(yolo_labels_dir, label_name)
prob = random.randint(1, 100)
print("Probability: %d" % prob)
if(prob < TRAIN_RATIO): # train dataset
if os.path.exists(annotation_path):
train_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_train_dir + voc_path)
copyfile(label_path, yolov5_labels_train_dir + label_name)
else: # test dataset
if os.path.exists(annotation_path):
test_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_test_dir + voc_path)
copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()
Set the ratio of the training set to the verification set to 8:2, and the code is TRAIN_RATIO = 80. If you need to modify it, just change the number directly, and then run it.
Colab training
1.Upload dataset to google-drive
2.Colab configuration
Enter Colab, and change the type of runtime, hardware accelerator select GPU
!nvidia-smi #Enter the code to test
3.Colab connects to google-drive
import os
from google.colab import drive
drive.mount('/content/drive')
4.Adjust the file path
Perform the following code operation to realize directory replacement and go to the google-drive directory:
pip install ultralytics #Download required dependencies
%cd /content/drive/
%ls
%cd MyDrive/Colab Notebooks/
%ls
%cd test1/
%ls
#Note: The test1 file here needs to be created in Google Drive in advance, change the file path by yourself
5.Git clone yolo5 code, and adapt to our task
!git clone https://github.com/ultralytics/yolov5.git
%ls
%cd yolov5/
Since there is a problem with the version of this library in the python provided by google colab, we need to upgrade this library first to ensure that the program can run.
!pip install -U pyyaml
6.Adjust configuration files
Since the training has changed from the original 80 categories of yolov5 to only two categories (with and without masks), the configuration file of yolov5 needs to be changed.
Go back to My-drive and find that there are more yolov5 folders under the colab folder
Open yolov5->models->yolov5s.yaml->right-click open with text editor->modify the nc inside from 80->2->save the file
We also need to modify the directory file in the mask dataset (here test1 -> data_mask --> data.yaml), modify the directory of the training set and validation set
7.Run training file
!python train.py --data ../dataset_mask/data.yaml --cfg models/yolov5s.yaml --weights '' --batch-size 64
After the training, the curves and picture cases of the training process will be saved in test1/yolo5/runs/train