PulseFocusPlatform/static/configs/ssd/ssd_vgg16_512_voc.yml

architecture: SSD
use_gpu: true
max_iters: 120000
snapshot_iter: 10000
log_iter: 20
metric: VOC
map_type: 11point
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar
save_dir: output
weights: output/ssd_vgg16_512_voc/model_final
# 20(label_class) + 1(background)
num_classes: 21

SSD:
  backbone: VGG
  multi_box_head: MultiBoxHead
  output_decoder:
    background_label: 0
    keep_top_k: 200
    nms_eta: 1.0
    nms_threshold: 0.45
    nms_top_k: 400
    score_threshold: 0.01

VGG:
  depth: 16
  with_extra_blocks: true
  normalizations: [20., -1, -1, -1, -1, -1, -1]
  extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]]


MultiBoxHead:
  base_size: 512
  aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
  min_ratio: 20
  max_ratio: 90
  min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0]
  max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0]
  steps: [8, 16, 32, 64, 128, 256, 512]
  offset: 0.5
  flip: true
  kernel_size: 3
  pad: 1

LearningRate:
  base_lr: 0.001
  schedulers:
  - !PiecewiseDecay
    gamma: 0.1
    milestones: [80000, 100000]
  - !LinearWarmup
    start_factor: 0.3333333333333333
    steps: 500

OptimizerBuilder:
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0005
    type: L2

TrainReader:
  inputs_def:
    image_shape: [3, 512, 512]
    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
    !VOCDataSet
    dataset_dir: dataset/voc
    anno_path: trainval.txt
    use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
  - !RandomExpand
    fill_value: [123, 117, 104]
  - !RandomCrop
    allow_no_crop: true
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 512
    use_cv2: false
  - !RandomFlipImage
    is_normalized: true
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
  batch_size: 8
  shuffle: true
  worker_num: 8
  bufsize: 16
  use_process: true

EvalReader:
  inputs_def:
    image_shape: [3, 512, 512]
    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
    !VOCDataSet
    anno_path: test.txt
    dataset_dir: dataset/voc
    use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 512
    use_cv2: false
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
  batch_size: 32
  worker_num: 8
  bufsize: 16

TestReader:
  inputs_def:
    image_shape: [3,512,512]
    fields: ['image', 'im_id', 'im_shape']
  dataset:
    !ImageFolder
    anno_path: test.txt
    use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
  - !ResizeImage
    interp: 1
    max_size: 0
    target_size: 512
    use_cv2: true
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
  batch_size: 1
第一次提交 2022-06-01 11:18:00 +08:00			`architecture: SSD`
			`use_gpu: true`
			`max_iters: 120000`
			`snapshot_iter: 10000`
			`log_iter: 20`
			`metric: VOC`
			`map_type: 11point`
			`pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar`
			`save_dir: output`
			`weights: output/ssd_vgg16_512_voc/model_final`
			`# 20(label_class) + 1(background)`
			`num_classes: 21`

			`SSD:`
			`backbone: VGG`
			`multi_box_head: MultiBoxHead`
			`output_decoder:`
			`background_label: 0`
			`keep_top_k: 200`
			`nms_eta: 1.0`
			`nms_threshold: 0.45`
			`nms_top_k: 400`
			`score_threshold: 0.01`

			`VGG:`
			`depth: 16`
			`with_extra_blocks: true`
			`normalizations: [20., -1, -1, -1, -1, -1, -1]`
			`extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]]`


			`MultiBoxHead:`
			`base_size: 512`
			`aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]`
			`min_ratio: 20`
			`max_ratio: 90`
			`min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0]`
			`max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0]`
			`steps: [8, 16, 32, 64, 128, 256, 512]`
			`offset: 0.5`
			`flip: true`
			`kernel_size: 3`
			`pad: 1`

			`LearningRate:`
			`base_lr: 0.001`
			`schedulers:`
			`- !PiecewiseDecay`
			`gamma: 0.1`
			`milestones: [80000, 100000]`
			`- !LinearWarmup`
			`start_factor: 0.3333333333333333`
			`steps: 500`

			`OptimizerBuilder:`
			`optimizer:`
			`momentum: 0.9`
			`type: Momentum`
			`regularizer:`
			`factor: 0.0005`
			`type: L2`

			`TrainReader:`
			`inputs_def:`
			`image_shape: [3, 512, 512]`
			`fields: ['image', 'gt_bbox', 'gt_class']`
			`dataset:`
			`!VOCDataSet`
			`dataset_dir: dataset/voc`
			`anno_path: trainval.txt`
			`use_default_label: true`
			`sample_transforms:`
			`- !DecodeImage`
			`to_rgb: true`
			`- !RandomDistort`
			`brightness_lower: 0.875`
			`brightness_upper: 1.125`
			`is_order: true`
			`- !RandomExpand`
			`fill_value: [123, 117, 104]`
			`- !RandomCrop`
			`allow_no_crop: true`
			`- !NormalizeBox {}`
			`- !ResizeImage`
			`interp: 1`
			`target_size: 512`
			`use_cv2: false`
			`- !RandomFlipImage`
			`is_normalized: true`
			`- !Permute`
			`to_bgr: false`
			`- !NormalizeImage`
			`is_scale: false`
			`mean: [123, 117, 104]`
			`std: [1, 1, 1]`
			`batch_size: 8`
			`shuffle: true`
			`worker_num: 8`
			`bufsize: 16`
			`use_process: true`

			`EvalReader:`
			`inputs_def:`
			`image_shape: [3, 512, 512]`
			`fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']`
			`dataset:`
			`!VOCDataSet`
			`anno_path: test.txt`
			`dataset_dir: dataset/voc`
			`use_default_label: true`
			`sample_transforms:`
			`- !DecodeImage`
			`to_rgb: true`
			`with_mixup: false`
			`- !NormalizeBox {}`
			`- !ResizeImage`
			`interp: 1`
			`target_size: 512`
			`use_cv2: false`
			`- !Permute`
			`to_bgr: false`
			`- !NormalizeImage`
			`is_scale: false`
			`mean: [123, 117, 104]`
			`std: [1, 1, 1]`
			`batch_size: 32`
			`worker_num: 8`
			`bufsize: 16`

			`TestReader:`
			`inputs_def:`
			`image_shape: [3,512,512]`
			`fields: ['image', 'im_id', 'im_shape']`
			`dataset:`
			`!ImageFolder`
			`anno_path: test.txt`
			`use_default_label: true`
			`sample_transforms:`
			`- !DecodeImage`
			`to_rgb: true`
			`with_mixup: false`
			`- !ResizeImage`
			`interp: 1`
			`max_size: 0`
			`target_size: 512`
			`use_cv2: true`
			`- !Permute`
			`to_bgr: false`
			`- !NormalizeImage`
			`is_scale: false`
			`mean: [123, 117, 104]`
			`std: [1, 1, 1]`
			`batch_size: 1`