coco_predict.py
import argparse
import json
from torch.utils.data import DataLoader
from models import *
from utils.datasets import *
from utils.utils import *
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
def test(
cfg,
data_cfg,
weights,
batch_size,
img_size,
iou_thres,
conf_thres,
nms_thres,
save_json=True,
model=None
):
if model is None:
device = torch_utils.select_device()
model = Darknet(cfg, img_size).to(device)
if weights.endswith('.pt'):
model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:
_ = load_darknet_weights(model, weights)
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
else:
device = next(model.parameters()).device
data_cfg = parse_data_cfg(data_cfg)
nc = int(data_cfg['classes'])
test_path = data_cfg['valid']
names = load_classes(data_cfg['names'])
dataset = LoadImagesAndLabels(test_path, img_size=img_size)
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=4,
pin_memory=False,
collate_fn=dataset.collate_fn)
seen = 0
model.eval()
coco91class = coco80_to_coco91_class()
print(('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
jdict, stats, ap, ap_class = [], [], [], []
for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')):
targets = targets.to(device)
imgs = imgs.to(device)
if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
plot_images(imgs=imgs, targets=targets, fname='test_batch0.jpg')
inf_out, train_out = model(imgs)
target_list = build_targets(model, targets)
loss_i, _ = compute_loss(train_out, target_list)
loss += loss_i.item()
output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
for si, pred in enumerate(output):
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else []
seen += 1
if pred is None:
if nl:
stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
continue
if save_json:
image_id = int(Path(paths[si]).stem.split('_')[-1])
box = pred[:, :4].clone()
scale_coords(img_size, box, shapes[si])
box = xyxy2xywh(box)
box[:, :2] -= box[:, 2:] / 2
for di, d in enumerate(pred):
print("\ndi, d",di, d)
jdict.append({
'image_id': image_id,
'category_id': coco91class[int(d[6])],
'bbox': [float3(x) for x in box[di]],
'score': float(d[4])
})
correct = [0] * len(pred)
if nl:
detected = []
tbox = xywh2xyxy(labels[:, 1:5]) * img_size
for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
if len(detected) == nl:
break
if pcls.item() not in tcls:
continue
iou, bi = bbox_iou(pbox, tbox).max(0)
if iou > iou_thres and bi not in detected:
correct[i] = 1
detected.append(bi)
stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
nt = np.bincount(stats_np[3].astype(np.int64), minlength=nc)
if len(stats_np):
p, r, ap, f1, ap_class = ap_per_class(*stats_np)
mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
print (save_json,map)
if save_json and map and len(jdict):
imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
with open('results.json', 'w') as file:
json.dump(jdict, file)
cocoGt = COCO('/home/common/datasets/coco/annotations/instances_val2014.json')
cocoDt = cocoGt.loadRes('results.json')
print ("cocoEval.summarize()")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
map = cocoEval.stats[1]
return mp, mr, map, mf1, loss / len(dataloader)
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--batch-size', type=int, default=128, help='size of each image batch')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension')
opt = parser.parse_args()
print(opt, end='\n\n')
with torch.no_grad():
mAP = test(
opt.cfg,
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
opt.save_json
)
detect.py
import argparse
import time
from sys import platform
from models import *
from utils.datasets import *
from utils.utils import *
"""
对单个图片/视频/摄像头数据流进行预测,并将绘制的结果保存
"""
def detect(
cfg,
data_cfg,
weights,
images,
output='output',
img_size=416,
conf_thres=0.5,
nms_thres=0.5,
save_txt=False,
save_images=True,
webcam=False
):
device = torch_utils.select_device()
if os.path.exists(output):
shutil.rmtree(output)
os.makedirs(output)
model = Darknet(cfg, img_size)
if weights.endswith('.pt'):
model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:
_ = load_darknet_weights(model, weights)
model.to(device).eval()
vid_path, vid_writer = None, None
if webcam:
save_images = False
dataloader = LoadWebcam(img_size=img_size)
else:
dataloader = LoadImages(images, img_size=img_size)
classes = load_classes(parse_data_cfg(data_cfg)['names'])
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
for i, (path, img, im0, vid_cap) in enumerate(dataloader):
t = time.time()
save_path = str(Path(output) / Path(path).name)
img = torch.from_numpy(img).unsqueeze(0).to(device)
if ONNX_EXPORT:
torch.onnx.export(model, img, 'weights/model.onnx', verbose=True)
return
pred, _ = model(img)
detections = non_max_suppression(pred, conf_thres, nms_thres)[0]
if detections is not None and len(detections) > 0:
scale_coords(img_size, detections[:, :4], im0.shape).round()
for c in detections[:, -1].unique():
n = (detections[:, -1] == c).sum()
print('%g %ss' % (n, classes[int(c)]), end=', ')
for *xyxy, conf, cls_conf, cls in detections:
if save_txt:
with open(save_path + '.txt', 'a') as file:
file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
label = '%s %.2f' % (classes[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
print('Done. (%.3fs)' % (time.time() - t))
if webcam:
cv2.imshow(weights, im0)
if save_images:
if dataloader.mode == 'video':
if vid_path != save_path:
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release()
width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vid_cap.get(cv2.CAP_PROP_FPS)
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))
vid_writer.write(im0)
else:
cv2.imwrite(save_path, im0)
if save_images and platform == 'darwin':
os.system('open ' + output + ' ' + save_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
parser.add_argument('--images', type=str, default='data/samples', help='path to images')
parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
detect(
opt.cfg,
opt.data_cfg,
opt.weights,
opt.images,
img_size=opt.img_size,
conf_thres=opt.conf_thres,
nms_thres=opt.nms_thres
)
test.py
import argparse
import json
from torch.utils.data import DataLoader
from models import *
from utils.datasets import *
from utils.utils import *
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
"""
加载模型,预测结果,保存成json文件,然后使用cocoapi接口计算mAP
"""
def predict_and_eval(
cfg,
data_cfg,
weights,
batch_size,
img_size,
iou_thres,
conf_thres,
nms_thres,
save_json=True,
model=None
):
if model is None:
device = torch_utils.select_device()
model = Darknet(cfg, img_size).to(device)
if weights.endswith('.pt'):
model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:
_ = load_darknet_weights(model, weights)
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
else:
device = next(model.parameters()).device
data_cfg = parse_data_cfg(data_cfg)
nc = int(data_cfg['classes'])
test_path = data_cfg['valid']
names = load_classes(data_cfg['names'])
dataset = LoadImagesAndLabels(test_path, img_size=img_size)
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=4,
pin_memory=False,
collate_fn=dataset.collate_fn)
seen = 0
model.eval()
coco91class = coco80_to_coco91_class()
print(('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1'))
loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0.
jdict, stats, ap, ap_class = [], [], [], []
for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')):
targets = targets.to(device)
imgs = imgs.to(device)
if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
plot_images(imgs=imgs, targets=targets, fname='test_batch0.jpg')
inf_out, train_out = model(imgs)
target_list = build_targets(model, targets)
loss_i, _ = compute_loss(train_out, target_list)
loss += loss_i.item()
output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
for si, pred in enumerate(output):
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else []
seen += 1
if pred is None:
if nl:
stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
continue
if save_json:
image_id = int(Path(paths[si]).stem.split('_')[-1])
box = pred[:, :4].clone()
scale_coords(img_size, box, shapes[si])
box = xyxy2xywh(box)
box[:, :2] -= box[:, 2:] / 2
for di, d in enumerate(pred):
print("\ndi, d",di, d)
jdict.append({
'image_id': image_id,
'category_id': coco91class[int(d[6])],
'bbox': [float3(x) for x in box[di]],
'score': float(d[4])
})
correct = [0] * len(pred)
if nl:
detected = []
tbox = xywh2xyxy(labels[:, 1:5]) * img_size
for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
if len(detected) == nl:
break
if pcls.item() not in tcls:
continue
iou, bi = bbox_iou(pbox, tbox).max(0)
if iou > iou_thres and bi not in detected:
correct[i] = 1
detected.append(bi)
stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
nt = np.bincount(stats_np[3].astype(np.int64), minlength=nc)
if len(stats_np):
p, r, ap, f1, ap_class = ap_per_class(*stats_np)
mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
print (save_json,map)
if save_json and map and len(jdict):
imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
with open('results.json', 'w') as file:
json.dump(jdict, file)
cocoGt = COCO('/home/common/datasets/coco/annotations/instances_val2014.json')
cocoDt = cocoGt.loadRes('results.json')
print ("cocoEval.summarize()")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
map = cocoEval.stats[1]
return mp, mr, map, mf1, loss / len(dataloader)
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--batch-size', type=int, default=128, help='size of each image batch')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension')
opt = parser.parse_args()
print(opt, end='\n\n')
with torch.no_grad():
mAP = test(
opt.cfg,
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
opt.save_json
)
train.py
import argparse
import time
import torch.distributed as dist
from torch.utils.data import DataLoader
import test
from models import *
from utils.datasets import *
from utils.utils import *
"""
训练
"""
def train(
cfg,
data_cfg,
img_size=416,
resume=False,
epochs=273,
batch_size=16,
accumulate=1,
multi_scale=False,
freeze_backbone=False,
num_workers=4,
transfer=False
):
weights = 'weights' + os.sep
latest = weights + 'latest.pt'
best = weights + 'best.pt'
device = torch_utils.select_device()
if multi_scale:
img_size = 608
num_workers = 0
else:
torch.backends.cudnn.benchmark = True
train_path = parse_data_cfg(data_cfg)['train']
model = Darknet(cfg, img_size).to(device)
lr0 = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
cutoff = -1
start_epoch = 0
best_loss = float('inf')
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])
if resume:
if transfer:
chkpt = torch.load(weights + 'yolov3.pt', map_location=device)
model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255},
strict=False)
for p in model.parameters():
p.requires_grad = True if p.shape[0] == nf else False
else:
chkpt = torch.load(latest, map_location=device)
model.load_state_dict(chkpt['model'])
start_epoch = chkpt['epoch'] + 1
if chkpt['optimizer'] is not None:
optimizer.load_state_dict(chkpt['optimizer'])
best_loss = chkpt['best_loss']
del chkpt
else:
if '-tiny.cfg' in cfg:
cutoff = load_darknet_weights(model, weights + 'yolov3-tiny.conv.15')
else:
cutoff = load_darknet_weights(model, weights + 'darknet53.conv.74')
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[218, 245], gamma=0.1,
last_epoch=start_epoch - 1)
dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)
if torch.cuda.device_count() > 1:
dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank)
model = torch.nn.parallel.DistributedDataParallel(model)
sampler = torch.utils.data.distributed.DistributedSampler(dataset)
else:
sampler = None
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=num_workers,
shuffle=False,
pin_memory=False,
collate_fn=dataset.collate_fn,
sampler=sampler)
t = time.time()
model_info(model)
nB = len(dataloader)
n_burnin = min(round(nB / 5 + 1), 1000)
os.remove('train_batch0.jpg') if os.path.exists('train_batch0.jpg') else None
os.remove('test_batch0.jpg') if os.path.exists('test_batch0.jpg') else None
for epoch in range(start_epoch, epochs):
model.train()
print(('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'nTargets', 'time'))
scheduler.step()
if freeze_backbone and epoch < 2:
for name, p in model.named_parameters():
if int(name.split('.')[1]) < cutoff:
p.requires_grad = False if epoch == 0 else True
mloss = defaultdict(float)
for i, (imgs, targets, _, _) in enumerate(dataloader):
imgs = imgs.to(device)
targets = targets.to(device)
nt = len(targets)
if nt == 0:
continue
if epoch == 0 and i == 0:
plot_images(imgs=imgs, targets=targets, fname='train_batch0.jpg')
if epoch == 0 and i <= n_burnin:
lr = lr0 * (i / n_burnin) ** 4
for x in optimizer.param_groups:
x['lr'] = lr
pred = model(imgs)
target_list = build_targets(model, targets)
loss, loss_dict = compute_loss(pred, target_list)
loss.backward()
if (i + 1) % accumulate == 0 or (i + 1) == nB:
optimizer.step()
optimizer.zero_grad()
for key, val in loss_dict.items():
mloss[key] = (mloss[key] * i + val) / (i + 1)
s = ('%8s%12s' + '%10.3g' * 7) % (
'%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1),
mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'],
mloss['total'], nt, time.time() - t)
t = time.time()
print(s)
if multi_scale and (i + 1) % 10 == 0:
dataset.img_size = random.choice(range(10, 20)) * 32
print('multi_scale img_size = %g' % dataset.img_size)
with torch.no_grad():
results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model)
with open('results.txt', 'a') as file:
file.write(s + '%11.3g' * 5 % results + '\n')
test_loss = results[4]
if test_loss < best_loss:
best_loss = test_loss
save = True and not opt.nosave
if save:
chkpt = {'epoch': epoch,
'best_loss': best_loss,
'model': model.module.state_dict() if type(
model) is nn.parallel.DistributedDataParallel else model.state_dict(),
'optimizer': optimizer.state_dict()}
torch.save(chkpt, latest)
if best_loss == test_loss:
torch.save(chkpt, best)
if epoch > 0 and epoch % 10 == 0:
torch.save(chkpt, weights + 'backup%g.pt' % epoch)
del chkpt
"""
训练参数
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=273, help='number of epochs')
parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
parser.add_argument('--accumulate', type=int, default=1, help='accumulate gradient x batches before optimizing')
parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='data/coco.data', help='coco.data file path')
parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608')
parser.add_argument('--img-size', type=int, default=416, help='pixels')
parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--transfer', action='store_true', help='transfer learning flag')
parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers')
parser.add_argument('--dist-url', default='tcp://127.0.0.1:9999', type=str, help='distributed training init method')
parser.add_argument('--rank', default=0, type=int, help='distributed training node rank')
parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
parser.add_argument('--nosave', action='store_true', help='do not save training results')
opt = parser.parse_args()
print(opt, end='\n\n')
init_seeds()
train(
opt.cfg,
opt.data_cfg,
img_size=opt.img_size,
resume=opt.resume or opt.transfer,
transfer=opt.transfer,
epochs=opt.epochs,
batch_size=opt.batch_size,
accumulate=opt.accumulate,
multi_scale=opt.multi_scale,
num_workers=opt.num_workers
)