1.准备代码并搭建环境
复现环境ubuntu18.04+pytorch1.6.0+python3.7+cuda10.1
代码链接
下载源代码到指定文件夹下,终端进入yolov5-master文件夹下:
pip install -r requirements.txt#加载必要的包
2.制作数据集
1.更改data/voc.yaml文件
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Train command: python train.py --data voc.yaml
# Default dataset location is next to /yolov5:
# /parent_folder
# /VOC
# /yolov5
# download command/URL (optional)
download: bash data/scripts/get_voc.sh
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../VOC/images/train/ # 16551 images#训练数据txt文档
val: ../VOC/images/val/ # 4952 images#验证数据txt文档
# number of classes
nc: 20#修改成自己的类别种类不需要考虑背景加1
# class names
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
改后:
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Download command: bash ./data/get_voc.sh
# Train command: python train.py --data voc.yaml
# Default dataset location is next to /yolov5:
# /parent_folder
# /VOC
# /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: './VOC2007/train.txt' # 16551 images
val: './VOC2007/val.txt' # 4952 images
# number of classes
#nc: 7
nc: 1
names: ['lf']
# class names
#names: ['defect','weld','branch','tee','carriage','clamp','alternative']
2.更改models/yolov5s.yaml
类别数目改
# parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple# parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
3.制作train.txt和val.txt文档以及标签和图片
建立文件夹./VOC2007
将VOC形式的数据集(提前做好)
└── VOC2007
├── Annotations # 标注的VOC格式的xml标签文件
├── JPEGImages # 数据集图片
├── ImageSet
│ └── Main
│ ├── test.txt # voc划分的测试集
│ ├── train.txt # voc划分的训练集
│ ├── trainval.txt
│ └── val.txt # voc划分的验证集
├── shujuji.py#用来生成从voc-yolo数据格式的代码
├── train.txt #yolo训练集
├── val.txt #yolo测试集
├── labels
#shujuji.py
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets = ['trainval', 'test']
#classes = ['defect','weld','branch','tee','carriage','clamp','alternative'] # 自己训练的类别
classes=['lf']
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open('./Annotations/%s.xml' % (image_id))
out_file = open('./labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
if not os.path.exists('./labels/'):
os.makedirs('./labels/')
image_ids = open('./ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('./%s.txt' % (image_set), 'w')
for image_id in image_ids:
list_file.write('./JPEGImages/%s.jpg\n' % (image_id))
convert_annotation(image_id)
list_file.close()
生成的结果:
train 将test.txt改成val.txt
./JPEGImages/000029.jpg 内容类似
./JPEGImages/002435.jpg 将label下的内容拷贝到
./JPEGImages/003749.jpg JPEGImages文件夹下
./JPEGImages/003838.jpg
./JPEGImages/000704.jpg
./JPEGImages/003608.jpg
./JPEGImages/001522.jpg
./JPEGImages/001304.jpg
./JPEGImages/002169.jpg
./JPEGImages/002430.jpg
./JPEGImages/000734.jpg
./JPEGImages/001016.jpg
3.训练及测试
1.打开train.py,更改以下部分
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='./models/yolov5s.yaml', help='model.yaml path')
parser.add_argument('--data', type=str, default='./data/voc.yaml', help='data.yaml path')
parser.add_argument('--hyp', type=str, default='', help='hyp.yaml path (optional)')
parser.add_argument('--epochs', type=int, default=200)
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
parser.add_argument('--img-size', nargs='+', type=int, default=[700, 650], help='train,test sizes')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const='get_last', default=False,
help='resume from given path/last.pt, or most recent run if blank')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--notest', action='store_true', help='only test final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
parser.add_argument('--weights', type=str, default='', help='initial weights path')
parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--local-rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--logdir', type=str, default='runs/', help='logging directory')
opt = parser.parse_args()
开始训练:
Analyzing anchors... anchors/target = 2.21, Best Possible Recall (BPR) = 0.9841
Image sizes 704 train, 672 test
Using 8 dataloader workers
Starting training for 200 epochs...
Epoch gpu_mem GIoU obj cls total targets img_size
0/199 1.05G 0.08609 0.01714 0 0.1032 11 704: 100%|██████████| 45/45 [00:26<00:00, 1.67it/s]
Class Images Targets P R mAP@.5 mAP@.5:.95: 0%| | 0/5 [00:00<?, ?it/s]
在runs下看训练结果:(官方的结果,自己的和此图类似)
2.测试:test.py:修改
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
3.测试没有标注过的图片:依然修改此处
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='./runs/exp27/weights/best.pt',
help='model.pt path(s)')
#parser.add_argument('--source', type=str, default='./VOC2007/test_images',
# help='source') # file/folder, 0 for webcam
parser.add_argument('--source', type=str, default='/home/z/zuo/easy-faster-rcnn.pytorch-master/tongtai_jpg',
help='source')
parser.add_argument('--output', type=str, default='./output', help='output folder') # output folder
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt', default='./result.txt')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')