目录
下载yolov7源码
在yolov7项目根路径下的data文件夹内创建dataset文件夹
data/dataset/images
存放jpg图片
data/dataset/labels
存放将要制作的YOLO格式数据集的标签
LabelImg制作YOLO格式的数据集
设置保存格式为YOLO,标签保存文件夹为data/dataset/labels
划分数据集
将data/dataset/labels
内的classes.txt剪切粘贴到data/dataset
将这份代码内的路径参数进行必要的修改:
比如:
train_percent = 0.75
val_percent = 0.25
test_percent = 0.0
# 将图片和标注数据按比例切分为 训练集和测试集
import shutil
import random
import os
# 原始路径
image_original_path = "./data/dataset/images/"
label_original_path = "./data/dataset/labels/"
cur_path = os.getcwd()
# 训练集路径
train_image_path = os.path.join(cur_path, "data/dataset/processed/images/train/")
train_label_path = os.path.join(cur_path, "data/dataset/processed/labels/train/")
# 验证集路径
val_image_path = os.path.join(cur_path, "data/dataset/processed/images/val/")
val_label_path = os.path.join(cur_path, "data/dataset/processed/labels/val/")
# 测试集路径
test_image_path = os.path.join(cur_path, "data/dataset/processed/images/test/")
test_label_path = os.path.join(cur_path, "data/dataset/processed/labels/test/")
# 训练集目录
list_train = os.path.join(cur_path, "data/dataset/processed/train.txt")
list_val = os.path.join(cur_path, "data/dataset/processed/val.txt")
list_test = os.path.join(cur_path, "data/dataset/processed/test.txt")
train_percent = 0.75
val_percent = 0.25
test_percent = 0.0
def del_file(path):
for i in os.listdir(path):
file_data = path + "\\" + i
os.remove(file_data)
def mkdir():
if not os.path.exists(train_image_path):
os.makedirs(train_image_path)
else:
del_file(train_image_path)
if not os.path.exists(train_label_path):
os.makedirs(train_label_path)
else:
del_file(train_label_path)
if not os.path.exists(val_image_path):
os.makedirs(val_image_path)
else:
del_file(val_image_path)
if not os.path.exists(val_label_path):
os.makedirs(val_label_path)
else:
del_file(val_label_path)
if not os.path.exists(test_image_path):
os.makedirs(test_image_path)
else:
del_file(test_image_path)
if not os.path.exists(test_label_path):
os.makedirs(test_label_path)
else:
del_file(test_label_path)
def clearfile():
if os.path.exists(list_train):
os.remove(list_train)
if os.path.exists(list_val):
os.remove(list_val)
if os.path.exists(list_test):
os.remove(list_test)
def main():
mkdir()
clearfile()
file_train = open(list_train, 'w')
file_val = open(list_val, 'w')
file_test = open(list_test, 'w')
total_txt = os.listdir(label_original_path)
num_txt = len(total_txt)
list_all_txt = range(num_txt)
num_train = int(num_txt * train_percent)
num_val = int(num_txt * val_percent)
num_test = num_txt - num_train - num_val
train = random.sample(list_all_txt, num_train)
# train从list_all_txt取出num_train个元素
# 所以list_all_txt列表只剩下了这些元素
val_test = [i for i in list_all_txt if not i in train]
# 再从val_test取出num_val个元素,val_test剩下的元素就是test
val = random.sample(val_test, num_val)
print("训练集数目:{}, 验证集数目:{}, 测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))
for i in list_all_txt:
name = total_txt[i][:-4]
srcImage = image_original_path + name + '.jpg'
srcLabel = label_original_path + name + ".txt"
if i in train:
dst_train_Image = train_image_path + name + '.jpg'
dst_train_Label = train_label_path + name + '.txt'
shutil.copyfile(srcImage, dst_train_Image)
shutil.copyfile(srcLabel, dst_train_Label)
file_train.write(dst_train_Image + '\n')
elif i in val:
dst_val_Image = val_image_path + name + '.jpg'
dst_val_Label = val_label_path + name + '.txt'
shutil.copyfile(srcImage, dst_val_Image)
shutil.copyfile(srcLabel, dst_val_Label)
file_val.write(dst_val_Image + '\n')
else:
dst_test_Image = test_image_path + name + '.jpg'
dst_test_Label = test_label_path + name + '.txt'
shutil.copyfile(srcImage, dst_test_Image)
shutil.copyfile(srcLabel, dst_test_Label)
file_test.write(dst_test_Image + '\n')
file_train.close()
file_val.close()
file_test.close()
if __name__ == "__main__":
main()
运行后,代码已经自动生成了data/dataset/processed
的内容
编辑配置文件
-
复制
cfg\training\预训练权重对应的yaml文件
到原处但重新命名,这里假设把cfg\training\yolov7-tiny.yaml
复制并修改为了cfg\training\yolov7-tiny-underwater.yaml
。修改
cfg\training\yolov7-tiny-underwater.yaml
内容,nc
改为自己数据集的类别数,不用包含背景类。
-
复制
data\coco.yaml
并重命名创建为data\mydata.yaml
修改类似于这样:
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] train: ./data/dataset/processed/train.txt val: ./data/dataset/processed/val.txt test: ./data/dataset/processed/test.txt # number of classes nc: 4 # class names names: [ 'square', 'circle', 'triangle', 'start' ]
差不多已经可以训练模型了
参数配置不用每个都详细解释了,大概要注意的是
-
weights
:指定初始权重的路径。 -
cfg
:指定模型配置文件的路径。 -
data
:指定数据集的配置文件的路径。 -
hyp
:指定超参数的路径。 -
epochs
:指定训练的轮数。 -
batch-size
:指定每个批次的图像数量。 -
img-size
:指定训练和测试图像的大小。 -
device
:指定使用的设备,可以是GPU或CPU。 -
workers
:指定数据加载器的最大数量。 -
name
:指定训练的名称。
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='best.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='cfg/training/yolov7-tiny-underwater.yaml', help='model.yaml path')
parser.add_argument('--data', type=str, default='data/mydata.yaml', help='data.yaml path')
parser.add_argument('--hyp', type=str, default='data/hyp.scratch.tiny.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=600)
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
parser.add_argument('--img-size', nargs='+', type=int, default=[128, 128], help='[train, test] image sizes')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--notest', action='store_true', help='only test final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--workers', type=int, default=2, help='maximum number of dataloader workers')
parser.add_argument('--project', default='runs/train', help='save to project/name')
parser.add_argument('--entity', default=None, help='W&B entity')
parser.add_argument('--name', default='exp-yolov7-tiny', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--linear-lr', action='store_true', help='linear LR')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2')
parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation')
opt = parser.parse_args()
issue with training · Issue #1537 · WongKinYiu/yolov7 · GitHub