forked from PulseFocusPlatform/PulseFocusPlatform
154 lines
3.2 KiB
YAML
154 lines
3.2 KiB
YAML
architecture: SSD
|
|
use_gpu: true
|
|
max_iters: 120000
|
|
snapshot_iter: 10000
|
|
log_iter: 20
|
|
metric: VOC
|
|
map_type: 11point
|
|
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar
|
|
save_dir: output
|
|
weights: output/ssd_vgg16_512_voc/model_final
|
|
# 20(label_class) + 1(background)
|
|
num_classes: 21
|
|
|
|
SSD:
|
|
backbone: VGG
|
|
multi_box_head: MultiBoxHead
|
|
output_decoder:
|
|
background_label: 0
|
|
keep_top_k: 200
|
|
nms_eta: 1.0
|
|
nms_threshold: 0.45
|
|
nms_top_k: 400
|
|
score_threshold: 0.01
|
|
|
|
VGG:
|
|
depth: 16
|
|
with_extra_blocks: true
|
|
normalizations: [20., -1, -1, -1, -1, -1, -1]
|
|
extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]]
|
|
|
|
|
|
MultiBoxHead:
|
|
base_size: 512
|
|
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
|
|
min_ratio: 20
|
|
max_ratio: 90
|
|
min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0]
|
|
max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0]
|
|
steps: [8, 16, 32, 64, 128, 256, 512]
|
|
offset: 0.5
|
|
flip: true
|
|
kernel_size: 3
|
|
pad: 1
|
|
|
|
LearningRate:
|
|
base_lr: 0.001
|
|
schedulers:
|
|
- !PiecewiseDecay
|
|
gamma: 0.1
|
|
milestones: [80000, 100000]
|
|
- !LinearWarmup
|
|
start_factor: 0.3333333333333333
|
|
steps: 500
|
|
|
|
OptimizerBuilder:
|
|
optimizer:
|
|
momentum: 0.9
|
|
type: Momentum
|
|
regularizer:
|
|
factor: 0.0005
|
|
type: L2
|
|
|
|
TrainReader:
|
|
inputs_def:
|
|
image_shape: [3, 512, 512]
|
|
fields: ['image', 'gt_bbox', 'gt_class']
|
|
dataset:
|
|
!VOCDataSet
|
|
dataset_dir: dataset/voc
|
|
anno_path: trainval.txt
|
|
use_default_label: true
|
|
sample_transforms:
|
|
- !DecodeImage
|
|
to_rgb: true
|
|
- !RandomDistort
|
|
brightness_lower: 0.875
|
|
brightness_upper: 1.125
|
|
is_order: true
|
|
- !RandomExpand
|
|
fill_value: [123, 117, 104]
|
|
- !RandomCrop
|
|
allow_no_crop: true
|
|
- !NormalizeBox {}
|
|
- !ResizeImage
|
|
interp: 1
|
|
target_size: 512
|
|
use_cv2: false
|
|
- !RandomFlipImage
|
|
is_normalized: true
|
|
- !Permute
|
|
to_bgr: false
|
|
- !NormalizeImage
|
|
is_scale: false
|
|
mean: [123, 117, 104]
|
|
std: [1, 1, 1]
|
|
batch_size: 8
|
|
shuffle: true
|
|
worker_num: 8
|
|
bufsize: 16
|
|
use_process: true
|
|
|
|
EvalReader:
|
|
inputs_def:
|
|
image_shape: [3, 512, 512]
|
|
fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
|
|
dataset:
|
|
!VOCDataSet
|
|
anno_path: test.txt
|
|
dataset_dir: dataset/voc
|
|
use_default_label: true
|
|
sample_transforms:
|
|
- !DecodeImage
|
|
to_rgb: true
|
|
with_mixup: false
|
|
- !NormalizeBox {}
|
|
- !ResizeImage
|
|
interp: 1
|
|
target_size: 512
|
|
use_cv2: false
|
|
- !Permute
|
|
to_bgr: false
|
|
- !NormalizeImage
|
|
is_scale: false
|
|
mean: [123, 117, 104]
|
|
std: [1, 1, 1]
|
|
batch_size: 32
|
|
worker_num: 8
|
|
bufsize: 16
|
|
|
|
TestReader:
|
|
inputs_def:
|
|
image_shape: [3,512,512]
|
|
fields: ['image', 'im_id', 'im_shape']
|
|
dataset:
|
|
!ImageFolder
|
|
anno_path: test.txt
|
|
use_default_label: true
|
|
sample_transforms:
|
|
- !DecodeImage
|
|
to_rgb: true
|
|
with_mixup: false
|
|
- !ResizeImage
|
|
interp: 1
|
|
max_size: 0
|
|
target_size: 512
|
|
use_cv2: true
|
|
- !Permute
|
|
to_bgr: false
|
|
- !NormalizeImage
|
|
is_scale: false
|
|
mean: [123, 117, 104]
|
|
std: [1, 1, 1]
|
|
batch_size: 1
|