代码地址
主要是注意参数呀 同志们 官方使用说明上官方说明
没给例子 我纠结了好几天 不知道该怎么写
以下是例子
RandomDistort Architecture of detection, which is also the prefix of data feed module
architecture: YOLOv3
# Data feed module.
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
# Use GPU or CPU, true by default.
use_gpu: true
# Maximum number of iteration.
# In YOLOv3 model, default iteration number is to train for 270 epoches.
max_iters: 500200
# Smooth the log output in specified iterations, 20 by default.
log_smooth_window: 20
# The number of iteration interval to display in training log.
log_iter: 20
# The directory to save models.
save_dir: output
# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default.
snapshot_iter: 2000
# Evalution method, COCO and VOC are available.
metric: COCO
# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically.
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
# The path of final model for evaluation and test.
weights: output/yolov3_darknet/model_final
# Number of classes, 80 for COCO and 20 for VOC.
num_classes: 80
# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767
YOLOv3:
backbone: DarkNet
yolo_head: YOLOv3Head
# Backbone module
DarkNet:
# Batch normalization type in training, sync_bn for synchronized batch normalization
norm_type: sync_bn
# L2 weight decay factor of batch normalization layer
norm_decay: 0.
# Darknet convolution layer number, only support 53 currently
depth: 53
# YOLOv3 head module
# Generate bbox output in evaluation and calculate loss in training
# fluid.layers.yolov3_loss / fluid.layers.yolo_box
YOLOv3Head:
# anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
# L2 weight decay factor of batch normalization layer
norm_decay: 0.
# Ignore threshold for yolo_loss layer, 0.7 by default.
# Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh.
ignore_thresh: 0.7
# Whether use label smooth in yolo_loss layer
# It is recommended to set as true when only num_classes is very big
label_smooth: true
# fluid.layers.multiclass_nms
# Non-max suppress for output prediction boxes, see multiclass_nms for following parameters.
# 1. Select detection bounding boxes with high scores larger than score_threshold.
# 2. Select detection bounding boxes with the largest nms_top_k scores.
# 3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes.
# 4. Keep the top keep_top_k detection bounding boxes as output.
nms:
# Which label is regard as backgroud and will be ignored, -1 for no backgroud label.
background_label: -1
# Number of total bboxes to be kept per image after NMS step.
keep_top_k: 100
# IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed.
nms_threshold: 0.45
# Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold.
nms_top_k: 1000
# Whether detections are normalized.
normalized: false
# Threshold to filter out bounding boxes with low confidence score.
score_threshold: 0.01
# Learning rate configuration
LearningRate:
# Base learning rate for training, 1e-3 by default.
base_lr: 0.001
# Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
schedulers:
# fluid.layers.piecewise_decay
# each milestone stage decay gamma
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
# fluid.layers.linear_lr_warmup
# Start learning rate equals to base_lr * start_factor
- !LinearWarmup
start_factor: 0.
steps: 4000
# Optimizer module
OptimizerBuilder:
# fluid.optimizer
optimizer:
momentum: 0.9
type: Momentum
# fluid.regularizer
regularizer:
factor: 0.0005
type: L2
# Data feed module for training
YoloTrainFeed:
# Batch size per device, 8 by default
batch_size: 8
# Dataset module
dataset:
# Dataset directory.
dataset_dir: dataset/coco
# Annotation file path.
annotation: annotations/instances_train2017.json
# Directory where image files are stored.
image_dir: train2017
# List of data fields needed.
fields: [image, gt_box, gt_label, gt_score]
# List of image dims
image_shape: [3, 608, 608]
# List of sample transformations to use.
sample_transforms:
# read image data and decode to numpy.
- !DecodeImage
to_rgb: true
# YOLOv3 use image mixup in training.
with_mixup: true
# Mixup two images in training, a trick to improve performance.
- !MixupImage
alpha: 1.5 # default: 1.5
beta: 1.5 # default: 1.5
# Normalize gtbox to range [0, 1]
- !NormalizeBox {}
# Random color distort: brightness, contrast, hue, saturation.
- !RandomDistort
brightness_lower: 0.5
brightness_prob: 0.5
brightness_upper: 1.5
contrast_lower: 0.5
contrast_prob: 0.5
contrast_upper: 1.5
count: 4
hue_lower: -18
hue_prob: 0.5
hue_upper: 18
is_order: false
saturation_lower: 0.5
saturation_prob: 0.5
saturation_upper: 1.5
# Random Expand the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Construct new images with new height and width.
# 3. Fill the new image with the mean.
# 4. Put original imge into new image.
# 5. Rescale the bounding box.
# 6. Determine if the new bbox is satisfied in the new image.
- !ExpandImage
# max expand ratio, default 4.0.
max_ratio: 4.0
mean: [123.675, 116.28, 103.53]
prob: 0.5
# Random Crop the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Crop the image according to a radom sample.
# 3. Rescale the bounding box.
# 4. Determine if the new bbox is satisfied in the new image.
- !CropImage
# Recrop image if there are no bbox in output cropped image.
avoid_no_bbox: true
batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
# Whether should all bbox satisfy IoU constrains.
satisfy_all: false
# Interpolate image to target_size with random interpolate method:
# cv2.INTER_NEAREST,
# cv2.INTER_LINEAR,
# cv2.INTER_AREA,
# cv2.INTER_CUBIC,
# cv2.INTER_LANCZOS4,
- !RandomInterpImage
max_size: 0
target_size: 608
# Filp the image and bounding box.
# Operators:
# 1. Flip the image numpy.
# 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!)
# 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!)
- !RandomFlipImage
is_mask_flip: false
is_normalized: true
prob: 0.5
# Normalize the image.
# Operators:
# 1.(optional) Scale the image to [0,1]
# 2. Each pixel minus mean and is divided by std
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
# Change data layout to [C, H, W].
- !Permute
channel_first: true
to_bgr: false
# List of batch transformations to use.
batch_transforms:
# Random reshape images in each mini-batch to different shapes.
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
# YOLOv3 read gtbox into zero padded tensor with max box number as 50.
num_max_boxes: 50
# YOLOv3 read gtlabel without regarding backgroud as label 0.
with_background: false
# Number of samples, -1 represents all samples. -1 by default.
samples: -1
# Whether samples should be shuffled, true by default.
shuffle: true
# Whether drop last images which less than a batch.
drop_last: true
# Whether use multi-process reader in training.
use_process: true
# Use multi-process reader number.
num_workers: 8
# Buffer size for reader.
bufsize: 128
# Mixup image epoch number.
mixup_epoch: 250
# Data feed module for evaluation
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
batch_transforms: []
fields: [image, im_size, im_id, gt_box, gt_label, is_difficult]
image_shape: [3, 608, 608]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
# Rescale image to the specified target size, and capped at max_size if max_size != 0.
# If target_size is list, selected a scale randomly as the specified target size.
- !ResizeImage
interp: 2 # 2 for cv2.INTER_CUBIC
max_size: 0
target_size: 608
use_cv2: true
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !Permute
channel_first: true
to_bgr: false
num_max_boxes: 50
samples: -1
shuffle: false
drop_last: false
# Use multi-thread reader in evaluation mode.
use_process: false
# Thread number for multi-thread reader.
num_workers: 8
with_background: false
# Data feed module for test
YoloTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms: []
fields: [image, im_size, im_id]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 2
max_size: 0
target_size: 608
use_cv2: true
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !Permute
channel_first: true
to_bgr: false
num_max_boxes: 50
samples: -1
shuffle: false
drop_last: false
# Use multi-thread reader in test mode.
use_process: false
num_workers: 8
with_background: false