数据集结构
data
|-- phone
| |-- images
| |-- train
| |-- val
| |-- labels
| |-- train
| |-- val
划分数据集
# 划分数据集
import os
import random
import shutil
def split_data(source_image_folder, source_label_folder, dest_train_image_folder, dest_train_label_folder, dest_val_image_folder, dest_val_label_folder, split_ratio=0.8):
# 获取图片和标签文件列表
image_files = os.listdir(source_image_folder)
label_files = os.listdir(source_label_folder)
# 确保文件名一一对应
image_files.sort()
label_files.sort()
# 计算分割的索引
split_index = int(len(image_files) * split_ratio)
# 创建训练集和验证集文件夹
os.makedirs(dest_train_image_folder, exist_ok=True)
os.makedirs(dest_val_image_folder, exist_ok=True)
os.makedirs(dest_train_label_folder, exist_ok=True)
os.makedirs(dest_val_label_folder, exist_ok=True)
# 将数据复制到训练集和验证集文件夹
for i, (image_file, label_file) in enumerate(zip(image_files, label_files)):
src_image = os.path.join(source_image_folder, image_file)
src_label = os.path.join(source_label_folder, label_file)
if i < split_index:
dest_image = os.path.join(dest_train_image_folder, image_file)
dest_label = os.path.join(dest_train_label_folder, label_file)
else:
dest_image = os.path.join(dest_val_image_folder, image_file)
dest_label = os.path.join(dest_val_label_folder, label_file)
shutil.copyfile(src_image, dest_image)
shutil.copyfile(src_label, dest_label)
split_data("/home/aistudio/work/data/JPEGImages",
"/home/aistudio/work/data/labels",
"/home/aistudio/work/data/phone/images/train",
"/home/aistudio/work/data/phone/labels/train",
"/home/aistudio/work/data/phone/images/val",
"/home/aistudio/work/data/phone/labels/val")
print("Finish!")
检测是否一一对应
# 很奇怪,划分完数据集在train文件夹下有个.db文件,导致images和labels的文件没有对应上,所以还是检测一下,如果对应上了就不用管
# 检测images\train、images\val的文件名是否与labels\train、labels\val是否一致
import os
def find_mismatched_files(folder1, folder2):
files1 = set([os.path.splitext(file)[0] for file in os.listdir(folder1)])
files2 = set([os.path.splitext(file)[0] for file in os.listdir(folder2)])
mismatched_files = files1.symmetric_difference(files2)
return mismatched_files
def identify_mismatched_folders(mismatched_files, folder1, folder2):
mismatched_in_folder1 = [file for file in mismatched_files if file in set([os.path.splitext(f)[0] for f in os.listdir(folder1)])]
mismatched_in_folder2 = [file for file in mismatched_files if file in set([os.path.splitext(f)[0] for f in os.listdir(folder2)])]
return mismatched_in_folder1, mismatched_in_folder2
folder1 = "/home/aistudio/work/data/phone/images/train"
folder2 = "/home/aistudio/work/data/phone/labels/train"
# folder1 = "/home/aistudio/work/data/phone/images/val"
# folder2 = "/home/aistudio/work/data/phone/labels/val"
mismatched_files = find_mismatched_files(folder1, folder2)
if not mismatched_files:
print("文件名一致,没有不同的文件。")
else:
mismatched_in_folder1, mismatched_in_folder2 = identify_mismatched_folders(mismatched_files, folder1, folder2)
print("不同的文件名:")
for file in mismatched_in_folder1:
print(f"文件 '{file}' 在 {folder1} 中。")
for file in mismatched_in_folder2:
print(f"文件 '{file}' 在 {folder2} 中。")
下载Yolov5
训练前的几个小处理
①下载yolov5x.pt放在根目录下weights中,以防服务器连接GitHub下载该模型始终timeout
②Arial.ttf,手动下载放在根目录下,一样也是连接不上GitHub,然后在yolov5-master\utils\general.py中找到(ctrl+F)Font,将其改为根目录的路径
开始训练
# 训练
# epochs:指的就是训练过程中整个数据集将被迭代多少次,显卡不行你就调小点。
# batch-size:一次看完多少张图片才进行权重更新,梯度下降的mini-batch,显卡不行你就调小点。
# cfg:存储模型结构的配置文件
# data:存储训练、测试数据的文件
# img:输入图片宽高,显卡不行就调小点
python train.py --img 640 --batch 16 --epochs 100 --data ./data/phone.yaml --cfg ./models/yolov5x.yaml --weights ./weights/yolov5x.pt --device '0,1'
开始检测
python detect.py --weights ./runs/train/exp/weights/best.pt --cfg models/yolov5x.yaml --data ./data/phone.yaml --source ./test_image/play_phone.jpg