PulseFocusPlatform/static/configs/ssd/ssd_vgg16_512_voc.yml

154 lines
3.2 KiB
YAML

architecture: SSD
use_gpu: true
max_iters: 120000
snapshot_iter: 10000
log_iter: 20
metric: VOC
map_type: 11point
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar
save_dir: output
weights: output/ssd_vgg16_512_voc/model_final
# 20(label_class) + 1(background)
num_classes: 21
SSD:
backbone: VGG
multi_box_head: MultiBoxHead
output_decoder:
background_label: 0
keep_top_k: 200
nms_eta: 1.0
nms_threshold: 0.45
nms_top_k: 400
score_threshold: 0.01
VGG:
depth: 16
with_extra_blocks: true
normalizations: [20., -1, -1, -1, -1, -1, -1]
extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]]
MultiBoxHead:
base_size: 512
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
min_ratio: 20
max_ratio: 90
min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0]
max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0]
steps: [8, 16, 32, 64, 128, 256, 512]
offset: 0.5
flip: true
kernel_size: 3
pad: 1
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [80000, 100000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
TrainReader:
inputs_def:
image_shape: [3, 512, 512]
fields: ['image', 'gt_bbox', 'gt_class']
dataset:
!VOCDataSet
dataset_dir: dataset/voc
anno_path: trainval.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !RandomExpand
fill_value: [123, 117, 104]
- !RandomCrop
allow_no_crop: true
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 512
use_cv2: false
- !RandomFlipImage
is_normalized: true
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 8
shuffle: true
worker_num: 8
bufsize: 16
use_process: true
EvalReader:
inputs_def:
image_shape: [3, 512, 512]
fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
dataset:
!VOCDataSet
anno_path: test.txt
dataset_dir: dataset/voc
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 512
use_cv2: false
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 32
worker_num: 8
bufsize: 16
TestReader:
inputs_def:
image_shape: [3,512,512]
fields: ['image', 'im_id', 'im_shape']
dataset:
!ImageFolder
anno_path: test.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
max_size: 0
target_size: 512
use_cv2: true
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 1