第一次提交

2022-06-01 11:18:00 +08:00 · 2022-06-01 11:18:00 +08:00 · 0cab1d8282
parent e753eb708c
commit 0cab1d8282
1246 changed files with 187079 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,38 @@
-# PulseFocusPlatform
+# Pulse Focus Platform脉冲聚焦
+## 0. 软件介绍
+Pulse Focus Platform脉冲聚焦是面向水底物体图像识别的实时检测软件。软件以面向对象的设计理念，采用Python语言编程，基于pyqt、paddle、pyyaml以及ppdet等技术开发，支持多批量图像、长视频等多种本地数据源，预置多种物体识别模型，并提供扩展接口，方便新模型的集成与验证。平台安装简单，运行方便，可选参数丰富，扩展性高，非常适用于相关研究领域的工程技术人员和学生掌握学习侧扫或光学数据等形成的水底图像中关注物体的识别方法。

+脉冲聚焦软件设计了图片和视频两种数据输入下的多物体识别功能。针对图片数据，调用模型进行单张图片预测，随后在前端可视化输出多物体识别结果；针对视频流动态图像数据，首先对视频流数据进行分帧采样，获取采样图片，再针对采样图片进行多物体识别，将采样识别结果进行视频合成，然后在前端可视化输出视频流数据识别结果。为了视频流数据处理的高效性，设计了采样-识别-展示的多线程处理方式，可加快视频流数据处理。
+
+软件界面简单，易学易用，包含参数的输入选择，程序的运行，算法结果的展示等，源代码公开，算法可修改。
+
+开发人员：K. Wang、H.P. Yu、J. Li、H.T. Li、Z.Q. Wang、Z.Y. Zhao、L.F. Zhang、G. Chen
+
+## 1. 开发环境配置
+运行以下命令：
+```bash
+conda env create -f create_env.yaml
+```
+该命令会创建一个名为`Focus`的conda虚拟环境，用`conda activate Focus`即可激活该虚拟环境。
+
+
+## 2. 软件运行
+运行以下命令运行软件：
+```python
+python main.py
+```
+
+## 3. 一些说明
+1. 使用GPU版本
+
+    参考百度飞桨paddle官方网站安装
+    
+    [安装链接](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html)
+    
+2. 模型文件连接
+
+    由于软件包含已训练好的三个模型，模型参数较大，放在百度网盘自行下载，并放在根目录。
+
+    链接：https://pan.baidu.com/s/1Wf4gjmccgqQYeknm9pX2jQ 
+    
+    提取码：2gs7 
--- a/SSS_win.py
+++ b/SSS_win.py
@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+
+# Form implementation generated from reading ui file 'SSS_win.ui'
+#
+# Created by: PyQt5 UI code generator 5.15.4
+#
+# WARNING: Any manual changes made to this file will be lost when pyuic5 is
+# run again.  Do not edit this file unless you know what you are doing.
+
+
+from PyQt5 import QtCore, QtGui, QtWidgets
+
+
+class Ui_MainWindow(object):
+    def setupUi(self, MainWindow):
+        MainWindow.setObjectName("MainWindow")
+        MainWindow.resize(1353, 1007)
+        self.centralwidget = QtWidgets.QWidget(MainWindow)
+        self.centralwidget.setObjectName("centralwidget")
+        self.verticalLayout_5 = QtWidgets.QVBoxLayout(self.centralwidget)
+        self.verticalLayout_5.setObjectName("verticalLayout_5")
+        self.verticalLayout = QtWidgets.QVBoxLayout()
+        self.verticalLayout.setObjectName("verticalLayout")
+        self.horizontalLayout = QtWidgets.QHBoxLayout()
+        self.horizontalLayout.setObjectName("horizontalLayout")
+        self.verticalLayout_2 = QtWidgets.QVBoxLayout()
+        self.verticalLayout_2.setObjectName("verticalLayout_2")
+        self.tupiandiaoru = QtWidgets.QPushButton(self.centralwidget)
+        self.tupiandiaoru.setObjectName("tupiandiaoru")
+        self.verticalLayout_2.addWidget(self.tupiandiaoru)
+        self.shipindaoru = QtWidgets.QPushButton(self.centralwidget)
+        self.shipindaoru.setObjectName("shipindaoru")
+        self.verticalLayout_2.addWidget(self.shipindaoru)
+        self.pushButton_xxdaochu = QtWidgets.QPushButton(self.centralwidget)
+        self.pushButton_xxdaochu.setObjectName("pushButton_xxdaochu")
+        self.verticalLayout_2.addWidget(self.pushButton_xxdaochu)
+        self.horizontalLayout.addLayout(self.verticalLayout_2)
+        self.formLayout = QtWidgets.QFormLayout()
+        self.formLayout.setObjectName("formLayout")
+        self.comboBox_tzxuanze = QtWidgets.QComboBox(self.centralwidget)
+        self.comboBox_tzxuanze.setEditable(True)
+        self.comboBox_tzxuanze.setObjectName("comboBox_tzxuanze")
+        self.comboBox_tzxuanze.addItem("")
+        self.formLayout.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.comboBox_tzxuanze)
+        self.label_2 = QtWidgets.QLabel(self.centralwidget)
+        self.label_2.setObjectName("label_2")
+        self.formLayout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.label_2)
+        self.comboBox_yclfangfa = QtWidgets.QComboBox(self.centralwidget)
+        self.comboBox_yclfangfa.setEditable(True)
+        self.comboBox_yclfangfa.setObjectName("comboBox_yclfangfa")
+        self.comboBox_yclfangfa.addItem("")
+        self.comboBox_yclfangfa.addItem("")
+        self.comboBox_yclfangfa.addItem("")
+        self.formLayout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.comboBox_yclfangfa)
+        self.label_3 = QtWidgets.QLabel(self.centralwidget)
+        self.label_3.setObjectName("label_3")
+        self.formLayout.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.label_3)
+        self.comboBox_sbsuanfa = QtWidgets.QComboBox(self.centralwidget)
+        self.comboBox_sbsuanfa.setEditable(True)
+        self.comboBox_sbsuanfa.setObjectName("comboBox_sbsuanfa")
+        self.comboBox_sbsuanfa.addItem("")
+        self.comboBox_sbsuanfa.addItem("")
+        self.comboBox_sbsuanfa.addItem("")
+        self.formLayout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.comboBox_sbsuanfa)
+        self.label_4 = QtWidgets.QLabel(self.centralwidget)
+        self.label_4.setObjectName("label_4")
+        self.formLayout.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.label_4)
+        self.comboBox_GPU = QtWidgets.QComboBox(self.centralwidget)
+        self.comboBox_GPU.setEditable(True)
+        self.comboBox_GPU.setObjectName("comboBox_GPU")
+        self.comboBox_GPU.addItem("")
+        self.comboBox_GPU.addItem("")
+        self.formLayout.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.comboBox_GPU)
+        self.label = QtWidgets.QLabel(self.centralwidget)
+        self.label.setObjectName("label")
+        self.formLayout.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label)
+        self.horizontalLayout.addLayout(self.formLayout)
+        self.verticalLayout_4 = QtWidgets.QVBoxLayout()
+        self.verticalLayout_4.setObjectName("verticalLayout_4")
+        self.shibie = QtWidgets.QPushButton(self.centralwidget)
+        self.shibie.setObjectName("shibie")
+        self.verticalLayout_4.addWidget(self.shibie)
+        self.xunlian = QtWidgets.QPushButton(self.centralwidget)
+        self.xunlian.setObjectName("xunlian")
+        self.verticalLayout_4.addWidget(self.xunlian)
+        self.horizontalLayout.addLayout(self.verticalLayout_4)
+        self.verticalLayout_3 = QtWidgets.QVBoxLayout()
+        self.verticalLayout_3.setObjectName("verticalLayout_3")
+        self.pushButton_jswendang = QtWidgets.QPushButton(self.centralwidget)
+        self.pushButton_jswendang.setObjectName("pushButton_jswendang")
+        self.verticalLayout_3.addWidget(self.pushButton_jswendang)
+        self.pushButton_rjwendang = QtWidgets.QPushButton(self.centralwidget)
+        self.pushButton_rjwendang.setObjectName("pushButton_rjwendang")
+        self.verticalLayout_3.addWidget(self.pushButton_rjwendang)
+        self.pushButton_tuichu = QtWidgets.QPushButton(self.centralwidget)
+        self.pushButton_tuichu.setObjectName("pushButton_tuichu")
+        self.verticalLayout_3.addWidget(self.pushButton_tuichu)
+        self.horizontalLayout.addLayout(self.verticalLayout_3)
+        self.verticalLayout.addLayout(self.horizontalLayout)
+        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
+        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
+        self.label_movie = QtWidgets.QLabel(self.centralwidget)
+        self.label_movie.setMinimumSize(QtCore.QSize(1200, 800))
+        self.label_movie.setText("")
+        self.label_movie.setObjectName("label_movie")
+        self.horizontalLayout_2.addWidget(self.label_movie)
+        self.formLayout_2 = QtWidgets.QFormLayout()
+        self.formLayout_2.setObjectName("formLayout_2")
+        self.label_mb1 = QtWidgets.QLabel(self.centralwidget)
+        self.label_mb1.setObjectName("label_mb1")
+        self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label_mb1)
+        self.flv = QtWidgets.QLabel(self.centralwidget)
+        self.flv.setObjectName("flv")
+        self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.flv)
+        self.label_mb2 = QtWidgets.QLabel(self.centralwidget)
+        self.label_mb2.setObjectName("label_mb2")
+        self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.label_mb2)
+        self.gx = QtWidgets.QLabel(self.centralwidget)
+        self.gx.setObjectName("gx")
+        self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.gx)
+        self.label_mb3 = QtWidgets.QLabel(self.centralwidget)
+        self.label_mb3.setObjectName("label_mb3")
+        self.formLayout_2.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.label_mb3)
+        self.mbw = QtWidgets.QLabel(self.centralwidget)
+        self.mbw.setObjectName("mbw")
+        self.formLayout_2.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.mbw)
+        self.label_5 = QtWidgets.QLabel(self.centralwidget)
+        self.label_5.setObjectName("label_5")
+        self.formLayout_2.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.label_5)
+        self.label_object = QtWidgets.QLabel(self.centralwidget)
+        self.label_object.setObjectName("label_object")
+        self.formLayout_2.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.label_object)
+        self.horizontalLayout_2.addLayout(self.formLayout_2)
+        self.verticalLayout.addLayout(self.horizontalLayout_2)
+        self.label_10 = QtWidgets.QLabel(self.centralwidget)
+        self.label_10.setObjectName("label_10")
+        self.verticalLayout.addWidget(self.label_10)
+        self.verticalLayout_5.addLayout(self.verticalLayout)
+        MainWindow.setCentralWidget(self.centralwidget)
+        self.statusbar = QtWidgets.QStatusBar(MainWindow)
+        self.statusbar.setObjectName("statusbar")
+        MainWindow.setStatusBar(self.statusbar)
+
+        self.retranslateUi(MainWindow)
+        self.tupiandiaoru.clicked.connect(MainWindow.import_pic)
+        self.shibie.clicked.connect(MainWindow.press_shibie)
+        self.xunlian.clicked.connect(MainWindow.press_xunlian)
+        self.pushButton_tuichu.clicked.connect(MainWindow.exit)
+        self.shipindaoru.clicked.connect(MainWindow.press_movie)
+        self.comboBox_sbsuanfa.activated.connect(MainWindow.moxingxuanze)
+        self.comboBox_GPU.activated.connect(MainWindow.gpu_use)
+        QtCore.QMetaObject.connectSlotsByName(MainWindow)
+
+    def retranslateUi(self, MainWindow):
+        _translate = QtCore.QCoreApplication.translate
+        MainWindow.setWindowTitle(_translate("MainWindow", "脉冲聚焦"))
+        self.tupiandiaoru.setText(_translate("MainWindow", "静态图像导入"))
+        self.shipindaoru.setText(_translate("MainWindow", "动态图像导入"))
+        self.pushButton_xxdaochu.setText(_translate("MainWindow", "信息导出"))
+        self.comboBox_tzxuanze.setItemText(0, _translate("MainWindow", "无"))
+        self.label_2.setText(_translate("MainWindow", "预处理方法"))
+        self.comboBox_yclfangfa.setItemText(0, _translate("MainWindow", "多尺度融合"))
+        self.comboBox_yclfangfa.setItemText(1, _translate("MainWindow", "图像增广"))
+        self.comboBox_yclfangfa.setItemText(2, _translate("MainWindow", "图像重塑"))
+        self.label_3.setText(_translate("MainWindow", "聚焦算法"))
+        self.comboBox_sbsuanfa.setCurrentText(_translate("MainWindow", "PPYOLO-BOT"))
+        self.comboBox_sbsuanfa.setItemText(0, _translate("MainWindow", "PPYOLO-BOT"))
+        self.comboBox_sbsuanfa.setItemText(1, _translate("MainWindow", "YOLOV3"))
+        self.comboBox_sbsuanfa.setItemText(2, _translate("MainWindow", "RCNN"))
+        self.label_4.setText(_translate("MainWindow", "GPU加速"))
+        self.comboBox_GPU.setCurrentText(_translate("MainWindow", "YES"))
+        self.comboBox_GPU.setItemText(0, _translate("MainWindow", "YES"))
+        self.comboBox_GPU.setItemText(1, _translate("MainWindow", "NO"))
+        self.label.setText(_translate("MainWindow", "特征选择"))
+        self.shibie.setText(_translate("MainWindow", "聚焦"))
+        self.xunlian.setText(_translate("MainWindow", "训练"))
+        self.pushButton_jswendang.setText(_translate("MainWindow", "技术文档"))
+        self.pushButton_rjwendang.setText(_translate("MainWindow", "软件说明文档"))
+        self.pushButton_tuichu.setText(_translate("MainWindow", "退出"))
+        self.label_mb1.setText(_translate("MainWindow", "flv"))
+        self.flv.setText(_translate("MainWindow", "0"))
+        self.label_mb2.setText(_translate("MainWindow", "gx"))
+        self.gx.setText(_translate("MainWindow", "0"))
+        self.label_mb3.setText(_translate("MainWindow", "mbw"))
+        self.mbw.setText(_translate("MainWindow", "0"))
+        self.label_5.setText(_translate("MainWindow", "object"))
+        self.label_object.setText(_translate("MainWindow", "0"))
+        self.label_10.setText(_translate("MainWindow", "TextLabel"))
--- a/SSS_win.ui
+++ b/SSS_win.ui
@ -0,0 +1,417 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>MainWindow</class>
+ <widget class="QMainWindow" name="MainWindow">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>1353</width>
+    <height>1007</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>MainWindow</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <layout class="QVBoxLayout" name="verticalLayout_5">
+    <item>
+     <layout class="QVBoxLayout" name="verticalLayout">
+      <item>
+       <layout class="QHBoxLayout" name="horizontalLayout">
+        <item>
+         <layout class="QVBoxLayout" name="verticalLayout_2">
+          <item>
+           <widget class="QPushButton" name="tupiandiaoru">
+            <property name="text">
+             <string>静态图像导入</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="shipindaoru">
+            <property name="text">
+             <string>动态图像导入</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="pushButton_xxdaochu">
+            <property name="text">
+             <string>信息导出</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <layout class="QFormLayout" name="formLayout">
+          <item row="0" column="1">
+           <widget class="QComboBox" name="comboBox_tzxuanze">
+            <property name="editable">
+             <bool>true</bool>
+            </property>
+            <item>
+             <property name="text">
+              <string>无</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+          <item row="1" column="0">
+           <widget class="QLabel" name="label_2">
+            <property name="text">
+             <string>预处理方法</string>
+            </property>
+           </widget>
+          </item>
+          <item row="1" column="1">
+           <widget class="QComboBox" name="comboBox_yclfangfa">
+            <property name="editable">
+             <bool>true</bool>
+            </property>
+            <item>
+             <property name="text">
+              <string>多尺度融合</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>图像增广</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>图像重塑</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+          <item row="2" column="0">
+           <widget class="QLabel" name="label_3">
+            <property name="text">
+             <string>识别算法</string>
+            </property>
+           </widget>
+          </item>
+          <item row="2" column="1">
+           <widget class="QComboBox" name="comboBox_sbsuanfa">
+            <property name="editable">
+             <bool>true</bool>
+            </property>
+            <property name="currentText">
+             <string>PPYOLO-BOT</string>
+            </property>
+            <item>
+             <property name="text">
+              <string>PPYOLO-BOT</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>YOLOV3</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>RCNN</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+          <item row="3" column="0">
+           <widget class="QLabel" name="label_4">
+            <property name="text">
+             <string>GPU加速</string>
+            </property>
+           </widget>
+          </item>
+          <item row="3" column="1">
+           <widget class="QComboBox" name="comboBox_GPU">
+            <property name="editable">
+             <bool>true</bool>
+            </property>
+            <property name="currentText">
+             <string>YES</string>
+            </property>
+            <item>
+             <property name="text">
+              <string>YES</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>NO</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+          <item row="0" column="0">
+           <widget class="QLabel" name="label">
+            <property name="text">
+             <string>特征选择</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <layout class="QVBoxLayout" name="verticalLayout_4">
+          <item>
+           <widget class="QPushButton" name="shibie">
+            <property name="text">
+             <string>识别</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="xunlian">
+            <property name="text">
+             <string>训练</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <layout class="QVBoxLayout" name="verticalLayout_3">
+          <item>
+           <widget class="QPushButton" name="pushButton_jswendang">
+            <property name="text">
+             <string>技术文档</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="pushButton_rjwendang">
+            <property name="text">
+             <string>软件说明文档</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="pushButton_tuichu">
+            <property name="text">
+             <string>退出</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+       </layout>
+      </item>
+      <item>
+       <layout class="QHBoxLayout" name="horizontalLayout_2">
+        <item>
+         <widget class="QLabel" name="label_movie">
+          <property name="minimumSize">
+           <size>
+            <width>1200</width>
+            <height>800</height>
+           </size>
+          </property>
+          <property name="text">
+           <string/>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <layout class="QFormLayout" name="formLayout_2">
+          <item row="0" column="0">
+           <widget class="QLabel" name="label_mb1">
+            <property name="text">
+             <string>flv</string>
+            </property>
+           </widget>
+          </item>
+          <item row="0" column="1">
+           <widget class="QLabel" name="flv">
+            <property name="text">
+             <string>0</string>
+            </property>
+           </widget>
+          </item>
+          <item row="1" column="0">
+           <widget class="QLabel" name="label_mb2">
+            <property name="text">
+             <string>gx</string>
+            </property>
+           </widget>
+          </item>
+          <item row="1" column="1">
+           <widget class="QLabel" name="gx">
+            <property name="text">
+             <string>0</string>
+            </property>
+           </widget>
+          </item>
+          <item row="2" column="0">
+           <widget class="QLabel" name="label_mb3">
+            <property name="text">
+             <string>mbw</string>
+            </property>
+           </widget>
+          </item>
+          <item row="2" column="1">
+           <widget class="QLabel" name="mbw">
+            <property name="text">
+             <string>0</string>
+            </property>
+           </widget>
+          </item>
+          <item row="3" column="0">
+           <widget class="QLabel" name="label_5">
+            <property name="text">
+             <string>object</string>
+            </property>
+           </widget>
+          </item>
+          <item row="3" column="1">
+           <widget class="QLabel" name="label_object">
+            <property name="text">
+             <string>0</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+       </layout>
+      </item>
+      <item>
+       <widget class="QLabel" name="label_10">
+        <property name="text">
+         <string>TextLabel</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QStatusBar" name="statusbar"/>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>tupiandiaoru</sender>
+   <signal>clicked()</signal>
+   <receiver>MainWindow</receiver>
+   <slot>import_pic()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>255</x>
+     <y>42</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>414</x>
+     <y>8</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>shibie</sender>
+   <signal>clicked()</signal>
+   <receiver>MainWindow</receiver>
+   <slot>press_shibie()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>983</x>
+     <y>48</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>1021</x>
+     <y>0</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>xunlian</sender>
+   <signal>clicked()</signal>
+   <receiver>MainWindow</receiver>
+   <slot>press_xunlian()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>1101</x>
+     <y>115</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>823</x>
+     <y>0</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>pushButton_tuichu</sender>
+   <signal>clicked()</signal>
+   <receiver>MainWindow</receiver>
+   <slot>exit()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>1467</x>
+     <y>128</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>1481</x>
+     <y>381</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>shipindaoru</sender>
+   <signal>clicked()</signal>
+   <receiver>MainWindow</receiver>
+   <slot>press_movie()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>362</x>
+     <y>89</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>379</x>
+     <y>0</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>comboBox_sbsuanfa</sender>
+   <signal>activated(QString)</signal>
+   <receiver>MainWindow</receiver>
+   <slot>moxingxuanze()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>621</x>
+     <y>91</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>686</x>
+     <y>85</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>comboBox_GPU</sender>
+   <signal>activated(QString)</signal>
+   <receiver>MainWindow</receiver>
+   <slot>gpu_use()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>597</x>
+     <y>122</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>656</x>
+     <y>122</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+ <slots>
+  <slot>import_pic()</slot>
+  <slot>press_shibie()</slot>
+  <slot>press_xunlian()</slot>
+  <slot>exit()</slot>
+  <slot>press_movie()</slot>
+  <slot>moxingxuanze()</slot>
+  <slot>gpu_use()</slot>
+ </slots>
+</ui>
--- a/build/.DS_Store
+++ b/build/.DS_Store
--- a/build/lib/ppdet/init.py
+++ b/build/lib/ppdet/init.py
@ -0,0 +1,16 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import (core, data, engine, modeling, model_zoo, optimizer, metrics,
+               utils, slim)
--- a/build/lib/ppdet/core/init.py
+++ b/build/lib/ppdet/core/init.py
@ -0,0 +1,15 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import config
--- a/build/lib/ppdet/core/config/init.py
+++ b/build/lib/ppdet/core/config/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/build/lib/ppdet/core/config/schema.py
+++ b/build/lib/ppdet/core/config/schema.py
@ -0,0 +1,248 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import inspect
+import importlib
+import re
+
+try:
+    from docstring_parser import parse as doc_parse
+except Exception:
+
+    def doc_parse(*args):
+        pass
+
+
+try:
+    from typeguard import check_type
+except Exception:
+
+    def check_type(*args):
+        pass
+
+
+__all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema']
+
+
+class SchemaValue(object):
+    def __init__(self, name, doc='', type=None):
+        super(SchemaValue, self).__init__()
+        self.name = name
+        self.doc = doc
+        self.type = type
+
+    def set_default(self, value):
+        self.default = value
+
+    def has_default(self):
+        return hasattr(self, 'default')
+
+
+class SchemaDict(dict):
+    def __init__(self, **kwargs):
+        super(SchemaDict, self).__init__()
+        self.schema = {}
+        self.strict = False
+        self.doc = ""
+        self.update(kwargs)
+
+    def __setitem__(self, key, value):
+        # XXX also update regular dict to SchemaDict??
+        if isinstance(value, dict) and key in self and isinstance(self[key],
+                                                                  SchemaDict):
+            self[key].update(value)
+        else:
+            super(SchemaDict, self).__setitem__(key, value)
+
+    def __missing__(self, key):
+        if self.has_default(key):
+            return self.schema[key].default
+        elif key in self.schema:
+            return self.schema[key]
+        else:
+            raise KeyError(key)
+
+    def copy(self):
+        newone = SchemaDict()
+        newone.__dict__.update(self.__dict__)
+        newone.update(self)
+        return newone
+
+    def set_schema(self, key, value):
+        assert isinstance(value, SchemaValue)
+        self.schema[key] = value
+
+    def set_strict(self, strict):
+        self.strict = strict
+
+    def has_default(self, key):
+        return key in self.schema and self.schema[key].has_default()
+
+    def is_default(self, key):
+        if not self.has_default(key):
+            return False
+        if hasattr(self[key], '__dict__'):
+            return True
+        else:
+            return key not in self or self[key] == self.schema[key].default
+
+    def find_default_keys(self):
+        return [
+            k for k in list(self.keys()) + list(self.schema.keys())
+            if self.is_default(k)
+        ]
+
+    def mandatory(self):
+        return any([k for k in self.schema.keys() if not self.has_default(k)])
+
+    def find_missing_keys(self):
+        missing = [
+            k for k in self.schema.keys()
+            if k not in self and not self.has_default(k)
+        ]
+        placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
+        return missing + placeholders
+
+    def find_extra_keys(self):
+        return list(set(self.keys()) - set(self.schema.keys()))
+
+    def find_mismatch_keys(self):
+        mismatch_keys = []
+        for arg in self.schema.values():
+            if arg.type is not None:
+                try:
+                    check_type("{}.{}".format(self.name, arg.name),
+                               self[arg.name], arg.type)
+                except Exception:
+                    mismatch_keys.append(arg.name)
+        return mismatch_keys
+
+    def validate(self):
+        missing_keys = self.find_missing_keys()
+        if missing_keys:
+            raise ValueError("Missing param for class<{}>: {}".format(
+                self.name, ", ".join(missing_keys)))
+        extra_keys = self.find_extra_keys()
+        if extra_keys and self.strict:
+            raise ValueError("Extraneous param for class<{}>: {}".format(
+                self.name, ", ".join(extra_keys)))
+        mismatch_keys = self.find_mismatch_keys()
+        if mismatch_keys:
+            raise TypeError("Wrong param type for class<{}>: {}".format(
+                self.name, ", ".join(mismatch_keys)))
+
+
+class SharedConfig(object):
+    """
+    Representation class for `__shared__` annotations, which work as follows:
+
+    - if `key` is set for the module in config file, its value will take
+      precedence
+    - if `key` is not set for the module but present in the config file, its
+      value will be used
+    - otherwise, use the provided `default_value` as fallback
+
+    Args:
+        key: config[key] will be injected
+        default_value: fallback value
+    """
+
+    def __init__(self, key, default_value=None):
+        super(SharedConfig, self).__init__()
+        self.key = key
+        self.default_value = default_value
+
+
+def extract_schema(cls):
+    """
+    Extract schema from a given class
+
+    Args:
+        cls (type): Class from which to extract.
+
+    Returns:
+        schema (SchemaDict): Extracted schema.
+    """
+    ctor = cls.__init__
+    # python 2 compatibility
+    if hasattr(inspect, 'getfullargspec'):
+        argspec = inspect.getfullargspec(ctor)
+        annotations = argspec.annotations
+        has_kwargs = argspec.varkw is not None
+    else:
+        argspec = inspect.getfullargspec(ctor)
+        # python 2 type hinting workaround, see pep-3107
+        # however, since `typeguard` does not support python 2, type checking
+        # is still python 3 only for now
+        annotations = getattr(ctor, '__annotations__', {})
+        has_kwargs = argspec.varkw is not None
+
+    names = [arg for arg in argspec.args if arg != 'self']
+    defaults = argspec.defaults
+    num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
+    num_required = len(names) - num_defaults
+
+    docs = cls.__doc__
+    if docs is None and getattr(cls, '__category__', None) == 'op':
+        docs = cls.__call__.__doc__
+    try:
+        docstring = doc_parse(docs)
+    except Exception:
+        docstring = None
+
+    if docstring is None:
+        comments = {}
+    else:
+        comments = {}
+        for p in docstring.params:
+            match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
+            if match_obj is not None:
+                comments[match_obj.group(1)] = p.description
+
+    schema = SchemaDict()
+    schema.name = cls.__name__
+    schema.doc = ""
+    if docs is not None:
+        start_pos = docs[0] == '\n' and 1 or 0
+        schema.doc = docs[start_pos:].split("\n")[0].strip()
+    # XXX handle paddle's weird doc convention
+    if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
+        schema.doc = schema.doc[2:-2].strip()
+    schema.category = hasattr(cls, '__category__') and getattr(
+        cls, '__category__') or 'module'
+    schema.strict = not has_kwargs
+    schema.pymodule = importlib.import_module(cls.__module__)
+    schema.inject = getattr(cls, '__inject__', [])
+    schema.shared = getattr(cls, '__shared__', [])
+    for idx, name in enumerate(names):
+        comment = name in comments and comments[name] or name
+        if name in schema.inject:
+            type_ = None
+        else:
+            type_ = name in annotations and annotations[name] or None
+        value_schema = SchemaValue(name, comment, type_)
+        if name in schema.shared:
+            assert idx >= num_required, "shared config must have default value"
+            default = defaults[idx - num_required]
+            value_schema.set_default(SharedConfig(name, default))
+        elif idx >= num_required:
+            default = defaults[idx - num_required]
+            value_schema.set_default(default)
+        schema.set_schema(name, value_schema)
+
+    return schema
--- a/build/lib/ppdet/core/config/yaml_helpers.py
+++ b/build/lib/ppdet/core/config/yaml_helpers.py
@ -0,0 +1,118 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import inspect
+
+import yaml
+from .schema import SharedConfig
+
+__all__ = ['serializable', 'Callable']
+
+
+def represent_dictionary_order(self, dict_data):
+    return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
+
+
+def setup_orderdict():
+    from collections import OrderedDict
+    yaml.add_representer(OrderedDict, represent_dictionary_order)
+
+
+def _make_python_constructor(cls):
+    def python_constructor(loader, node):
+        if isinstance(node, yaml.SequenceNode):
+            args = loader.construct_sequence(node, deep=True)
+            return cls(*args)
+        else:
+            kwargs = loader.construct_mapping(node, deep=True)
+            try:
+                return cls(**kwargs)
+            except Exception as ex:
+                print("Error when construct {} instance from yaml config".
+                      format(cls.__name__))
+                raise ex
+
+    return python_constructor
+
+
+def _make_python_representer(cls):
+    # python 2 compatibility
+    if hasattr(inspect, 'getfullargspec'):
+        argspec = inspect.getfullargspec(cls)
+    else:
+        argspec = inspect.getfullargspec(cls.__init__)
+    argnames = [arg for arg in argspec.args if arg != 'self']
+
+    def python_representer(dumper, obj):
+        if argnames:
+            data = {name: getattr(obj, name) for name in argnames}
+        else:
+            data = obj.__dict__
+        if '_id' in data:
+            del data['_id']
+        return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
+
+    return python_representer
+
+
+def serializable(cls):
+    """
+    Add loader and dumper for given class, which must be
+    "trivially serializable"
+
+    Args:
+        cls: class to be serialized
+
+    Returns: cls
+    """
+    yaml.add_constructor(u'!{}'.format(cls.__name__),
+                         _make_python_constructor(cls))
+    yaml.add_representer(cls, _make_python_representer(cls))
+    return cls
+
+
+yaml.add_representer(SharedConfig,
+                     lambda d, o: d.represent_data(o.default_value))
+
+
+@serializable
+class Callable(object):
+    """
+    Helper to be used in Yaml for creating arbitrary class objects
+
+    Args:
+        full_type (str): the full module path to target function
+    """
+
+    def __init__(self, full_type, args=[], kwargs={}):
+        super(Callable, self).__init__()
+        self.full_type = full_type
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self):
+        if '.' in self.full_type:
+            idx = self.full_type.rfind('.')
+            module = importlib.import_module(self.full_type[:idx])
+            func_name = self.full_type[idx + 1:]
+        else:
+            try:
+                module = importlib.import_module('builtins')
+            except Exception:
+                module = importlib.import_module('__builtin__')
+            func_name = self.full_type
+
+        func = getattr(module, func_name)
+        return func(*self.args, **self.kwargs)
--- a/build/lib/ppdet/core/workspace.py
+++ b/build/lib/ppdet/core/workspace.py
@ -0,0 +1,275 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import importlib
+import os
+import sys
+
+import yaml
+import collections
+
+try:
+    collectionsAbc = collections.abc
+except AttributeError:
+    collectionsAbc = collections
+
+from .config.schema import SchemaDict, SharedConfig, extract_schema
+from .config.yaml_helpers import serializable
+
+__all__ = [
+    'global_config',
+    'load_config',
+    'merge_config',
+    'get_registered_modules',
+    'create',
+    'register',
+    'serializable',
+    'dump_value',
+]
+
+
+def dump_value(value):
+    # XXX this is hackish, but collections.abc is not available in python 2
+    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
+        value = yaml.dump(value, default_flow_style=True)
+        value = value.replace('\n', '')
+        value = value.replace('...', '')
+        return "'{}'".format(value)
+    else:
+        # primitive types
+        return str(value)
+
+
+class AttrDict(dict):
+    """Single level attribute dict, NOT recursive"""
+
+    def __init__(self, **kwargs):
+        super(AttrDict, self).__init__()
+        super(AttrDict, self).update(kwargs)
+
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError("object has no attribute '{}'".format(key))
+
+
+global_config = AttrDict()
+
+BASE_KEY = '_BASE_'
+
+
+# parse and load _BASE_ recursively
+def _load_config_with_base(file_path):
+    with open(file_path) as f:
+        file_cfg = yaml.load(f, Loader=yaml.Loader)
+
+    # NOTE: cfgs outside have higher priority than cfgs in _BASE_
+    if BASE_KEY in file_cfg:
+        all_base_cfg = AttrDict()
+        base_ymls = list(file_cfg[BASE_KEY])
+        for base_yml in base_ymls:
+            if base_yml.startswith("~"):
+                base_yml = os.path.expanduser(base_yml)
+            if not base_yml.startswith('/'):
+                base_yml = os.path.join(os.path.dirname(file_path), base_yml)
+
+            with open(base_yml) as f:
+                base_cfg = _load_config_with_base(base_yml)
+                all_base_cfg = merge_config(base_cfg, all_base_cfg)
+
+        del file_cfg[BASE_KEY]
+        return merge_config(file_cfg, all_base_cfg)
+
+    return file_cfg
+
+
+def load_config(file_path):
+    """
+    Load config from file.
+
+    Args:
+        file_path (str): Path of the config file to be loaded.
+
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+
+    # load config from file and merge into global config
+    cfg = _load_config_with_base(file_path)
+    cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
+    merge_config(cfg)
+
+    return global_config
+
+
+def dict_merge(dct, merge_dct):
+    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
+    updating only top-level keys, dict_merge recurses down into dicts nested
+    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
+    ``dct``.
+
+    Args:
+        dct: dict onto which the merge is executed
+        merge_dct: dct merged into dct
+
+    Returns: dct
+    """
+    for k, v in merge_dct.items():
+        if (k in dct and isinstance(dct[k], dict) and
+                isinstance(merge_dct[k], collectionsAbc.Mapping)):
+            dict_merge(dct[k], merge_dct[k])
+        else:
+            dct[k] = merge_dct[k]
+    return dct
+
+
+def merge_config(config, another_cfg=None):
+    """
+    Merge config into global config or another_cfg.
+
+    Args:
+        config (dict): Config to be merged.
+
+    Returns: global config
+    """
+    global global_config
+    dct = another_cfg or global_config
+    return dict_merge(dct, config)
+
+
+def get_registered_modules():
+    return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}
+
+
+def make_partial(cls):
+    op_module = importlib.import_module(cls.__op__.__module__)
+    op = getattr(op_module, cls.__op__.__name__)
+    cls.__category__ = getattr(cls, '__category__', None) or 'op'
+
+    def partial_apply(self, *args, **kwargs):
+        kwargs_ = self.__dict__.copy()
+        kwargs_.update(kwargs)
+        return op(*args, **kwargs_)
+
+    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
+        if sys.version_info[0] > 2:
+            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
+            cls.__init__.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+            cls.__call__.__doc__ = op.__doc__
+        else:
+            # XXX work around for python 2
+            partial_apply.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+    return cls
+
+
+def register(cls):
+    """
+    Register a given module class.
+
+    Args:
+        cls (type): Module class to be registered.
+
+    Returns: cls
+    """
+    if cls.__name__ in global_config:
+        raise ValueError("Module class already registered: {}".format(
+            cls.__name__))
+    if hasattr(cls, '__op__'):
+        cls = make_partial(cls)
+    global_config[cls.__name__] = extract_schema(cls)
+    return cls
+
+
+def create(cls_or_name, **kwargs):
+    """
+    Create an instance of given module class.
+
+    Args:
+        cls_or_name (type or str): Class of which to create instance.
+
+    Returns: instance of type `cls_or_name`
+    """
+    assert type(cls_or_name) in [type, str
+                                 ], "should be a class or name of a class"
+    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
+    assert name in global_config and \
+        isinstance(global_config[name], SchemaDict), \
+        "the module {} is not registered".format(name)
+    config = global_config[name]
+    cls = getattr(config.pymodule, name)
+    cls_kwargs = {}
+    cls_kwargs.update(global_config[name])
+
+    # parse `shared` annoation of registered modules
+    if getattr(config, 'shared', None):
+        for k in config.shared:
+            target_key = config[k]
+            shared_conf = config.schema[k].default
+            assert isinstance(shared_conf, SharedConfig)
+            if target_key is not None and not isinstance(target_key,
+                                                         SharedConfig):
+                continue  # value is given for the module
+            elif shared_conf.key in global_config:
+                # `key` is present in config
+                cls_kwargs[k] = global_config[shared_conf.key]
+            else:
+                cls_kwargs[k] = shared_conf.default_value
+
+    # parse `inject` annoation of registered modules
+    if getattr(cls, 'from_config', None):
+        cls_kwargs.update(cls.from_config(config, **kwargs))
+
+    if getattr(config, 'inject', None):
+        for k in config.inject:
+            target_key = config[k]
+            # optional dependency
+            if target_key is None:
+                continue
+
+            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
+                if 'name' not in target_key.keys():
+                    continue
+                inject_name = str(target_key['name'])
+                if inject_name not in global_config:
+                    raise ValueError(
+                        "Missing injection name {} and check it's name in cfg file".
+                        format(k))
+                target = global_config[inject_name]
+                for i, v in target_key.items():
+                    if i == 'name':
+                        continue
+                    target[i] = v
+                if isinstance(target, SchemaDict):
+                    cls_kwargs[k] = create(inject_name)
+            elif isinstance(target_key, str):
+                if target_key not in global_config:
+                    raise ValueError("Missing injection config:", target_key)
+                target = global_config[target_key]
+                if isinstance(target, SchemaDict):
+                    cls_kwargs[k] = create(target_key)
+                elif hasattr(target, '__dict__'):  # serialized object
+                    cls_kwargs[k] = target
+            else:
+                raise ValueError("Unsupported injection type:", target_key)
+    # prevent modification of global config values of reference types
+    # (e.g., list, dict) from within the created module instances
+    #kwargs = copy.deepcopy(kwargs)
+    return cls(**cls_kwargs)
--- a/build/lib/ppdet/data/init.py
+++ b/build/lib/ppdet/data/init.py
@ -0,0 +1,21 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from . import source
+from . import transform
+from . import reader
+
+from .source import *
+from .transform import *
+from .reader import *
--- a/build/lib/ppdet/data/reader.py
+++ b/build/lib/ppdet/data/reader.py
@ -0,0 +1,303 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import traceback
+import six
+import sys
+
+if sys.version_info >= (3, 0):
+    pass
+else:
+    pass
+import numpy as np
+
+from paddle.io import DataLoader, DistributedBatchSampler
+from paddle.fluid.dataloader.collate import default_collate_fn
+
+from ppdet.core.workspace import register
+from . import transform
+from .shm_utils import _get_shared_memory_size_in_M
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger('reader')
+
+MAIN_PID = os.getpid()
+
+
+class Compose(object):
+    def __init__(self, transforms, num_classes=80):
+        self.transforms = transforms
+        self.transforms_cls = []
+        for t in self.transforms:
+            for k, v in t.items():
+                op_cls = getattr(transform, k)
+                f = op_cls(**v)
+                if hasattr(f, 'num_classes'):
+                    f.num_classes = num_classes
+
+                self.transforms_cls.append(f)
+
+    def __call__(self, data):
+        for f in self.transforms_cls:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map sample transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        return data
+
+
+class BatchCompose(Compose):
+    def __init__(self, transforms, num_classes=80, collate_batch=True):
+        super(BatchCompose, self).__init__(transforms, num_classes)
+        self.collate_batch = collate_batch
+
+    def __call__(self, data):
+        for f in self.transforms_cls:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map batch transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        # remove keys which is not needed by model
+        extra_key = ['h', 'w', 'flipped']
+        for k in extra_key:
+            for sample in data:
+                if k in sample:
+                    sample.pop(k)
+
+        # batch data, if user-define batch function needed
+        # use user-defined here
+        if self.collate_batch:
+            batch_data = default_collate_fn(data)
+        else:
+            batch_data = {}
+            for k in data[0].keys():
+                tmp_data = []
+                for i in range(len(data)):
+                    tmp_data.append(data[i][k])
+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
+                    tmp_data = np.stack(tmp_data, axis=0)
+                batch_data[k] = tmp_data
+        return batch_data
+
+
+class BaseDataLoader(object):
+    """
+    Base DataLoader implementation for detection models
+
+    Args:
+        sample_transforms (list): a list of transforms to perform
+                                  on each sample
+        batch_transforms (list): a list of transforms to perform
+                                 on batch
+        batch_size (int): batch size for batch collating, default 1.
+        shuffle (bool): whether to shuffle samples
+        drop_last (bool): whether to drop the last incomplete,
+                          default False
+        num_classes (int): class number of dataset, default 80
+        collate_batch (bool): whether to collate batch in dataloader.
+            If set to True, the samples will collate into batch according
+            to the batch size. Otherwise, the ground-truth will not collate,
+            which is used when the number of ground-truch is different in 
+            samples.
+        use_shared_memory (bool): whether to use shared memory to
+                accelerate data loading, enable this only if you
+                are sure that the shared memory size of your OS
+                is larger than memory cost of input datas of model.
+                Note that shared memory will be automatically
+                disabled if the shared memory of OS is less than
+                1G, which is not enough for detection models.
+                Default False.
+    """
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=80,
+                 collate_batch=True,
+                 use_shared_memory=False,
+                 **kwargs):
+        # sample transform
+        self._sample_transforms = Compose(
+            sample_transforms, num_classes=num_classes)
+
+        # batch transfrom 
+        self._batch_transforms = BatchCompose(batch_transforms, num_classes,
+                                              collate_batch)
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = drop_last
+        self.use_shared_memory = use_shared_memory
+        self.kwargs = kwargs
+
+    def __call__(self,
+                 dataset,
+                 worker_num,
+                 batch_sampler=None,
+                 return_list=False):
+        self.dataset = dataset
+        self.dataset.check_or_download_dataset()
+        self.dataset.parse_dataset()
+        # get data
+        self.dataset.set_transform(self._sample_transforms)
+        # set kwargs
+        self.dataset.set_kwargs(**self.kwargs)
+        # batch sampler
+        if batch_sampler is None:
+            self._batch_sampler = DistributedBatchSampler(
+                self.dataset,
+                batch_size=self.batch_size,
+                shuffle=self.shuffle,
+                drop_last=self.drop_last)
+        else:
+            self._batch_sampler = batch_sampler
+
+        # DataLoader do not start sub-process in Windows and Mac
+        # system, do not need to use shared memory
+        use_shared_memory = self.use_shared_memory and \
+                            sys.platform not in ['win32', 'darwin']
+        # check whether shared memory size is bigger than 1G(1024M)
+        if use_shared_memory:
+            shm_size = _get_shared_memory_size_in_M()
+            if shm_size is not None and shm_size < 1024.:
+                logger.warning("Shared memory size is less than 1G, "
+                               "disable shared_memory in DataLoader")
+                use_shared_memory = False
+
+        self.dataloader = DataLoader(
+            dataset=self.dataset,
+            batch_sampler=self._batch_sampler,
+            collate_fn=self._batch_transforms,
+            num_workers=worker_num,
+            return_list=return_list,
+            use_shared_memory=use_shared_memory)
+        self.loader = iter(self.dataloader)
+
+        return self
+
+    def __len__(self):
+        return len(self._batch_sampler)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            return next(self.loader)
+        except StopIteration:
+            self.loader = iter(self.dataloader)
+            six.reraise(*sys.exc_info())
+
+    def next(self):
+        # python2 compatibility
+        return self.__next__()
+
+
+@register
+class TrainReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=True,
+                 drop_last=True,
+                 num_classes=80,
+                 collate_batch=True,
+                 **kwargs):
+        super(TrainReader, self).__init__(sample_transforms, batch_transforms,
+                                          batch_size, shuffle, drop_last,
+                                          num_classes, collate_batch, **kwargs)
+
+
+@register
+class EvalReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=True,
+                 num_classes=80,
+                 **kwargs):
+        super(EvalReader, self).__init__(sample_transforms, batch_transforms,
+                                         batch_size, shuffle, drop_last,
+                                         num_classes, **kwargs)
+
+
+@register
+class TestReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=80,
+                 **kwargs):
+        super(TestReader, self).__init__(sample_transforms, batch_transforms,
+                                         batch_size, shuffle, drop_last,
+                                         num_classes, **kwargs)
+
+
+@register
+class EvalMOTReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=1,
+                 **kwargs):
+        super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms,
+                                            batch_size, shuffle, drop_last,
+                                            num_classes, **kwargs)
+
+
+@register
+class TestMOTReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=1,
+                 **kwargs):
+        super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
+                                            batch_size, shuffle, drop_last,
+                                            num_classes, **kwargs)
--- a/build/lib/ppdet/data/shm_utils.py
+++ b/build/lib/ppdet/data/shm_utils.py
@ -0,0 +1,67 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+SIZE_UNIT = ['K', 'M', 'G', 'T']
+SHM_QUERY_CMD = 'df -h'
+SHM_KEY = 'shm'
+SHM_DEFAULT_MOUNT = '/dev/shm'
+
+# [ shared memory size check ]
+# In detection models, image/target data occupies a lot of memory, and
+# will occupy lots of shared memory in multi-process DataLoader, we use
+# following code to get shared memory size and perform a size check to
+# disable shared memory use if shared memory size is not enough.
+# Shared memory getting process as follows:
+# 1. use `df -h` get all mount info
+# 2. pick up spaces whose mount info contains 'shm'
+# 3. if 'shm' space number is only 1, return its size
+# 4. if there are multiple 'shm' space, try to find the default mount
+#    directory '/dev/shm' is Linux-like system, otherwise return the
+#    biggest space size.
+
+
+def _parse_size_in_M(size_str):
+    num, unit = size_str[:-1], size_str[-1]
+    assert unit in SIZE_UNIT, \
+            "unknown shm size unit {}".format(unit)
+    return float(num) * \
+            (1024 ** (SIZE_UNIT.index(unit) - 1))
+
+
+def _get_shared_memory_size_in_M():
+    try:
+        df_infos = os.popen(SHM_QUERY_CMD).readlines()
+    except:
+        return None
+    else:
+        shm_infos = []
+        for df_info in df_infos:
+            info = df_info.strip()
+            if info.find(SHM_KEY) >= 0:
+                shm_infos.append(info.split())
+
+        if len(shm_infos) == 0:
+            return None
+        elif len(shm_infos) == 1:
+            return _parse_size_in_M(shm_infos[0][3])
+        else:
+            default_mount_infos = [
+                si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
+            ]
+            if default_mount_infos:
+                return _parse_size_in_M(default_mount_infos[0][3])
+            else:
+                return max([_parse_size_in_M(si[3]) for si in shm_infos])
--- a/build/lib/ppdet/data/source/init.py
+++ b/build/lib/ppdet/data/source/init.py
@ -0,0 +1,27 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import coco
+from . import voc
+from . import widerface
+from . import category
+from . import keypoint_coco
+from . import mot
+
+from .coco import *
+from .voc import *
+from .widerface import *
+from .category import *
+from .keypoint_coco import *
+from .mot import *
--- a/build/lib/ppdet/data/source/category.py
+++ b/build/lib/ppdet/data/source/category.py
@ -0,0 +1,823 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from ppdet.data.source.voc import pascalvoc_label
+from ppdet.data.source.widerface import widerface_label
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['get_categories']
+
+
+def get_categories(metric_type, anno_file=None, arch=None):
+    """
+    Get class id to category id map and category id
+    to category name map from annotation file.
+
+    Args:
+        metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
+            and 'widerface'.
+        anno_file (str): annotation file path
+    """
+    if arch == 'keypoint_arch':
+        return (None, {'id': 'keypoint'})
+
+    if metric_type.lower() == 'coco':
+        if anno_file and os.path.isfile(anno_file):
+            # lazy import pycocotools here
+            from pycocotools.coco import COCO
+
+            coco = COCO(anno_file)
+            cats = coco.loadCats(coco.getCatIds())
+
+            clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+            catid2name = {cat['id']: cat['name'] for cat in cats}
+            return clsid2catid, catid2name
+
+        # anno file not exist, load default categories of COCO17
+        else:
+            return _coco17_category()
+
+    elif metric_type.lower() == 'voc':
+        if anno_file and os.path.isfile(anno_file):
+            cats = []
+            with open(anno_file) as f:
+                for line in f.readlines():
+                    cats.append(line.strip())
+
+            if cats[0] == 'background':
+                cats = cats[1:]
+
+            clsid2catid = {i: i for i in range(len(cats))}
+            catid2name = {i: name for i, name in enumerate(cats)}
+
+            return clsid2catid, catid2name
+
+        # anno file not exist, load default categories of
+        # VOC all 20 categories
+        else:
+            return _vocall_category()
+
+    elif metric_type.lower() == 'oid':
+        if anno_file and os.path.isfile(anno_file):
+            logger.warning("only default categories support for OID19")
+        return _oid19_category()
+
+    elif metric_type.lower() == 'widerface':
+        return _widerface_category()
+
+    elif metric_type.lower() == 'keypointtopdowncocoeval':
+        return (None, {'id': 'keypoint'})
+
+    elif metric_type.lower() in ['mot', 'motdet', 'reid']:
+        return _mot_category()
+
+    else:
+        raise ValueError("unknown metric type {}".format(metric_type))
+
+
+def _mot_category():
+    """
+    Get class id to category id map and category id
+    to category name map of mot dataset
+    """
+    label_map = {'person': 0}
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
+
+
+def _coco17_category():
+    """
+    Get class id to category id map and category id
+    to category name map of COCO2017 dataset
+
+    """
+    clsid2catid = {
+        1: 1,
+        2: 2,
+        3: 3,
+        4: 4,
+        5: 5,
+        6: 6,
+        7: 7,
+        8: 8,
+        9: 9,
+        10: 10,
+        11: 11,
+        12: 13,
+        13: 14,
+        14: 15,
+        15: 16,
+        16: 17,
+        17: 18,
+        18: 19,
+        19: 20,
+        20: 21,
+        21: 22,
+        22: 23,
+        23: 24,
+        24: 25,
+        25: 27,
+        26: 28,
+        27: 31,
+        28: 32,
+        29: 33,
+        30: 34,
+        31: 35,
+        32: 36,
+        33: 37,
+        34: 38,
+        35: 39,
+        36: 40,
+        37: 41,
+        38: 42,
+        39: 43,
+        40: 44,
+        41: 46,
+        42: 47,
+        43: 48,
+        44: 49,
+        45: 50,
+        46: 51,
+        47: 52,
+        48: 53,
+        49: 54,
+        50: 55,
+        51: 56,
+        52: 57,
+        53: 58,
+        54: 59,
+        55: 60,
+        56: 61,
+        57: 62,
+        58: 63,
+        59: 64,
+        60: 65,
+        61: 67,
+        62: 70,
+        63: 72,
+        64: 73,
+        65: 74,
+        66: 75,
+        67: 76,
+        68: 77,
+        69: 78,
+        70: 79,
+        71: 80,
+        72: 81,
+        73: 82,
+        74: 84,
+        75: 85,
+        76: 86,
+        77: 87,
+        78: 88,
+        79: 89,
+        80: 90
+    }
+
+    catid2name = {
+        0: 'background',
+        1: 'person',
+        2: 'bicycle',
+        3: 'car',
+        4: 'motorcycle',
+        5: 'airplane',
+        6: 'bus',
+        7: 'train',
+        8: 'truck',
+        9: 'boat',
+        10: 'traffic light',
+        11: 'fire hydrant',
+        13: 'stop sign',
+        14: 'parking meter',
+        15: 'bench',
+        16: 'bird',
+        17: 'cat',
+        18: 'dog',
+        19: 'horse',
+        20: 'sheep',
+        21: 'cow',
+        22: 'elephant',
+        23: 'bear',
+        24: 'zebra',
+        25: 'giraffe',
+        27: 'backpack',
+        28: 'umbrella',
+        31: 'handbag',
+        32: 'tie',
+        33: 'suitcase',
+        34: 'frisbee',
+        35: 'skis',
+        36: 'snowboard',
+        37: 'sports ball',
+        38: 'kite',
+        39: 'baseball bat',
+        40: 'baseball glove',
+        41: 'skateboard',
+        42: 'surfboard',
+        43: 'tennis racket',
+        44: 'bottle',
+        46: 'wine glass',
+        47: 'cup',
+        48: 'fork',
+        49: 'knife',
+        50: 'spoon',
+        51: 'bowl',
+        52: 'banana',
+        53: 'apple',
+        54: 'sandwich',
+        55: 'orange',
+        56: 'broccoli',
+        57: 'carrot',
+        58: 'hot dog',
+        59: 'pizza',
+        60: 'donut',
+        61: 'cake',
+        62: 'chair',
+        63: 'couch',
+        64: 'potted plant',
+        65: 'bed',
+        67: 'dining table',
+        70: 'toilet',
+        72: 'tv',
+        73: 'laptop',
+        74: 'mouse',
+        75: 'remote',
+        76: 'keyboard',
+        77: 'cell phone',
+        78: 'microwave',
+        79: 'oven',
+        80: 'toaster',
+        81: 'sink',
+        82: 'refrigerator',
+        84: 'book',
+        85: 'clock',
+        86: 'vase',
+        87: 'scissors',
+        88: 'teddy bear',
+        89: 'hair drier',
+        90: 'toothbrush'
+    }
+
+    clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
+    catid2name.pop(0)
+
+    return clsid2catid, catid2name
+
+
+def _vocall_category():
+    """
+    Get class id to category id map and category id
+    to category name map of mixup voc dataset
+
+    """
+    label_map = pascalvoc_label()
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
+
+
+def _widerface_category():
+    label_map = widerface_label()
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
+
+
+def _oid19_category():
+    clsid2catid = {k: k + 1 for k in range(500)}
+
+    catid2name = {
+        0: "background",
+        1: "Infant bed",
+        2: "Rose",
+        3: "Flag",
+        4: "Flashlight",
+        5: "Sea turtle",
+        6: "Camera",
+        7: "Animal",
+        8: "Glove",
+        9: "Crocodile",
+        10: "Cattle",
+        11: "House",
+        12: "Guacamole",
+        13: "Penguin",
+        14: "Vehicle registration plate",
+        15: "Bench",
+        16: "Ladybug",
+        17: "Human nose",
+        18: "Watermelon",
+        19: "Flute",
+        20: "Butterfly",
+        21: "Washing machine",
+        22: "Raccoon",
+        23: "Segway",
+        24: "Taco",
+        25: "Jellyfish",
+        26: "Cake",
+        27: "Pen",
+        28: "Cannon",
+        29: "Bread",
+        30: "Tree",
+        31: "Shellfish",
+        32: "Bed",
+        33: "Hamster",
+        34: "Hat",
+        35: "Toaster",
+        36: "Sombrero",
+        37: "Tiara",
+        38: "Bowl",
+        39: "Dragonfly",
+        40: "Moths and butterflies",
+        41: "Antelope",
+        42: "Vegetable",
+        43: "Torch",
+        44: "Building",
+        45: "Power plugs and sockets",
+        46: "Blender",
+        47: "Billiard table",
+        48: "Cutting board",
+        49: "Bronze sculpture",
+        50: "Turtle",
+        51: "Broccoli",
+        52: "Tiger",
+        53: "Mirror",
+        54: "Bear",
+        55: "Zucchini",
+        56: "Dress",
+        57: "Volleyball",
+        58: "Guitar",
+        59: "Reptile",
+        60: "Golf cart",
+        61: "Tart",
+        62: "Fedora",
+        63: "Carnivore",
+        64: "Car",
+        65: "Lighthouse",
+        66: "Coffeemaker",
+        67: "Food processor",
+        68: "Truck",
+        69: "Bookcase",
+        70: "Surfboard",
+        71: "Footwear",
+        72: "Bench",
+        73: "Necklace",
+        74: "Flower",
+        75: "Radish",
+        76: "Marine mammal",
+        77: "Frying pan",
+        78: "Tap",
+        79: "Peach",
+        80: "Knife",
+        81: "Handbag",
+        82: "Laptop",
+        83: "Tent",
+        84: "Ambulance",
+        85: "Christmas tree",
+        86: "Eagle",
+        87: "Limousine",
+        88: "Kitchen & dining room table",
+        89: "Polar bear",
+        90: "Tower",
+        91: "Football",
+        92: "Willow",
+        93: "Human head",
+        94: "Stop sign",
+        95: "Banana",
+        96: "Mixer",
+        97: "Binoculars",
+        98: "Dessert",
+        99: "Bee",
+        100: "Chair",
+        101: "Wood-burning stove",
+        102: "Flowerpot",
+        103: "Beaker",
+        104: "Oyster",
+        105: "Woodpecker",
+        106: "Harp",
+        107: "Bathtub",
+        108: "Wall clock",
+        109: "Sports uniform",
+        110: "Rhinoceros",
+        111: "Beehive",
+        112: "Cupboard",
+        113: "Chicken",
+        114: "Man",
+        115: "Blue jay",
+        116: "Cucumber",
+        117: "Balloon",
+        118: "Kite",
+        119: "Fireplace",
+        120: "Lantern",
+        121: "Missile",
+        122: "Book",
+        123: "Spoon",
+        124: "Grapefruit",
+        125: "Squirrel",
+        126: "Orange",
+        127: "Coat",
+        128: "Punching bag",
+        129: "Zebra",
+        130: "Billboard",
+        131: "Bicycle",
+        132: "Door handle",
+        133: "Mechanical fan",
+        134: "Ring binder",
+        135: "Table",
+        136: "Parrot",
+        137: "Sock",
+        138: "Vase",
+        139: "Weapon",
+        140: "Shotgun",
+        141: "Glasses",
+        142: "Seahorse",
+        143: "Belt",
+        144: "Watercraft",
+        145: "Window",
+        146: "Giraffe",
+        147: "Lion",
+        148: "Tire",
+        149: "Vehicle",
+        150: "Canoe",
+        151: "Tie",
+        152: "Shelf",
+        153: "Picture frame",
+        154: "Printer",
+        155: "Human leg",
+        156: "Boat",
+        157: "Slow cooker",
+        158: "Croissant",
+        159: "Candle",
+        160: "Pancake",
+        161: "Pillow",
+        162: "Coin",
+        163: "Stretcher",
+        164: "Sandal",
+        165: "Woman",
+        166: "Stairs",
+        167: "Harpsichord",
+        168: "Stool",
+        169: "Bus",
+        170: "Suitcase",
+        171: "Human mouth",
+        172: "Juice",
+        173: "Skull",
+        174: "Door",
+        175: "Violin",
+        176: "Chopsticks",
+        177: "Digital clock",
+        178: "Sunflower",
+        179: "Leopard",
+        180: "Bell pepper",
+        181: "Harbor seal",
+        182: "Snake",
+        183: "Sewing machine",
+        184: "Goose",
+        185: "Helicopter",
+        186: "Seat belt",
+        187: "Coffee cup",
+        188: "Microwave oven",
+        189: "Hot dog",
+        190: "Countertop",
+        191: "Serving tray",
+        192: "Dog bed",
+        193: "Beer",
+        194: "Sunglasses",
+        195: "Golf ball",
+        196: "Waffle",
+        197: "Palm tree",
+        198: "Trumpet",
+        199: "Ruler",
+        200: "Helmet",
+        201: "Ladder",
+        202: "Office building",
+        203: "Tablet computer",
+        204: "Toilet paper",
+        205: "Pomegranate",
+        206: "Skirt",
+        207: "Gas stove",
+        208: "Cookie",
+        209: "Cart",
+        210: "Raven",
+        211: "Egg",
+        212: "Burrito",
+        213: "Goat",
+        214: "Kitchen knife",
+        215: "Skateboard",
+        216: "Salt and pepper shakers",
+        217: "Lynx",
+        218: "Boot",
+        219: "Platter",
+        220: "Ski",
+        221: "Swimwear",
+        222: "Swimming pool",
+        223: "Drinking straw",
+        224: "Wrench",
+        225: "Drum",
+        226: "Ant",
+        227: "Human ear",
+        228: "Headphones",
+        229: "Fountain",
+        230: "Bird",
+        231: "Jeans",
+        232: "Television",
+        233: "Crab",
+        234: "Microphone",
+        235: "Home appliance",
+        236: "Snowplow",
+        237: "Beetle",
+        238: "Artichoke",
+        239: "Jet ski",
+        240: "Stationary bicycle",
+        241: "Human hair",
+        242: "Brown bear",
+        243: "Starfish",
+        244: "Fork",
+        245: "Lobster",
+        246: "Corded phone",
+        247: "Drink",
+        248: "Saucer",
+        249: "Carrot",
+        250: "Insect",
+        251: "Clock",
+        252: "Castle",
+        253: "Tennis racket",
+        254: "Ceiling fan",
+        255: "Asparagus",
+        256: "Jaguar",
+        257: "Musical instrument",
+        258: "Train",
+        259: "Cat",
+        260: "Rifle",
+        261: "Dumbbell",
+        262: "Mobile phone",
+        263: "Taxi",
+        264: "Shower",
+        265: "Pitcher",
+        266: "Lemon",
+        267: "Invertebrate",
+        268: "Turkey",
+        269: "High heels",
+        270: "Bust",
+        271: "Elephant",
+        272: "Scarf",
+        273: "Barrel",
+        274: "Trombone",
+        275: "Pumpkin",
+        276: "Box",
+        277: "Tomato",
+        278: "Frog",
+        279: "Bidet",
+        280: "Human face",
+        281: "Houseplant",
+        282: "Van",
+        283: "Shark",
+        284: "Ice cream",
+        285: "Swim cap",
+        286: "Falcon",
+        287: "Ostrich",
+        288: "Handgun",
+        289: "Whiteboard",
+        290: "Lizard",
+        291: "Pasta",
+        292: "Snowmobile",
+        293: "Light bulb",
+        294: "Window blind",
+        295: "Muffin",
+        296: "Pretzel",
+        297: "Computer monitor",
+        298: "Horn",
+        299: "Furniture",
+        300: "Sandwich",
+        301: "Fox",
+        302: "Convenience store",
+        303: "Fish",
+        304: "Fruit",
+        305: "Earrings",
+        306: "Curtain",
+        307: "Grape",
+        308: "Sofa bed",
+        309: "Horse",
+        310: "Luggage and bags",
+        311: "Desk",
+        312: "Crutch",
+        313: "Bicycle helmet",
+        314: "Tick",
+        315: "Airplane",
+        316: "Canary",
+        317: "Spatula",
+        318: "Watch",
+        319: "Lily",
+        320: "Kitchen appliance",
+        321: "Filing cabinet",
+        322: "Aircraft",
+        323: "Cake stand",
+        324: "Candy",
+        325: "Sink",
+        326: "Mouse",
+        327: "Wine",
+        328: "Wheelchair",
+        329: "Goldfish",
+        330: "Refrigerator",
+        331: "French fries",
+        332: "Drawer",
+        333: "Treadmill",
+        334: "Picnic basket",
+        335: "Dice",
+        336: "Cabbage",
+        337: "Football helmet",
+        338: "Pig",
+        339: "Person",
+        340: "Shorts",
+        341: "Gondola",
+        342: "Honeycomb",
+        343: "Doughnut",
+        344: "Chest of drawers",
+        345: "Land vehicle",
+        346: "Bat",
+        347: "Monkey",
+        348: "Dagger",
+        349: "Tableware",
+        350: "Human foot",
+        351: "Mug",
+        352: "Alarm clock",
+        353: "Pressure cooker",
+        354: "Human hand",
+        355: "Tortoise",
+        356: "Baseball glove",
+        357: "Sword",
+        358: "Pear",
+        359: "Miniskirt",
+        360: "Traffic sign",
+        361: "Girl",
+        362: "Roller skates",
+        363: "Dinosaur",
+        364: "Porch",
+        365: "Human beard",
+        366: "Submarine sandwich",
+        367: "Screwdriver",
+        368: "Strawberry",
+        369: "Wine glass",
+        370: "Seafood",
+        371: "Racket",
+        372: "Wheel",
+        373: "Sea lion",
+        374: "Toy",
+        375: "Tea",
+        376: "Tennis ball",
+        377: "Waste container",
+        378: "Mule",
+        379: "Cricket ball",
+        380: "Pineapple",
+        381: "Coconut",
+        382: "Doll",
+        383: "Coffee table",
+        384: "Snowman",
+        385: "Lavender",
+        386: "Shrimp",
+        387: "Maple",
+        388: "Cowboy hat",
+        389: "Goggles",
+        390: "Rugby ball",
+        391: "Caterpillar",
+        392: "Poster",
+        393: "Rocket",
+        394: "Organ",
+        395: "Saxophone",
+        396: "Traffic light",
+        397: "Cocktail",
+        398: "Plastic bag",
+        399: "Squash",
+        400: "Mushroom",
+        401: "Hamburger",
+        402: "Light switch",
+        403: "Parachute",
+        404: "Teddy bear",
+        405: "Winter melon",
+        406: "Deer",
+        407: "Musical keyboard",
+        408: "Plumbing fixture",
+        409: "Scoreboard",
+        410: "Baseball bat",
+        411: "Envelope",
+        412: "Adhesive tape",
+        413: "Briefcase",
+        414: "Paddle",
+        415: "Bow and arrow",
+        416: "Telephone",
+        417: "Sheep",
+        418: "Jacket",
+        419: "Boy",
+        420: "Pizza",
+        421: "Otter",
+        422: "Office supplies",
+        423: "Couch",
+        424: "Cello",
+        425: "Bull",
+        426: "Camel",
+        427: "Ball",
+        428: "Duck",
+        429: "Whale",
+        430: "Shirt",
+        431: "Tank",
+        432: "Motorcycle",
+        433: "Accordion",
+        434: "Owl",
+        435: "Porcupine",
+        436: "Sun hat",
+        437: "Nail",
+        438: "Scissors",
+        439: "Swan",
+        440: "Lamp",
+        441: "Crown",
+        442: "Piano",
+        443: "Sculpture",
+        444: "Cheetah",
+        445: "Oboe",
+        446: "Tin can",
+        447: "Mango",
+        448: "Tripod",
+        449: "Oven",
+        450: "Mouse",
+        451: "Barge",
+        452: "Coffee",
+        453: "Snowboard",
+        454: "Common fig",
+        455: "Salad",
+        456: "Marine invertebrates",
+        457: "Umbrella",
+        458: "Kangaroo",
+        459: "Human arm",
+        460: "Measuring cup",
+        461: "Snail",
+        462: "Loveseat",
+        463: "Suit",
+        464: "Teapot",
+        465: "Bottle",
+        466: "Alpaca",
+        467: "Kettle",
+        468: "Trousers",
+        469: "Popcorn",
+        470: "Centipede",
+        471: "Spider",
+        472: "Sparrow",
+        473: "Plate",
+        474: "Bagel",
+        475: "Personal care",
+        476: "Apple",
+        477: "Brassiere",
+        478: "Bathroom cabinet",
+        479: "studio couch",
+        480: "Computer keyboard",
+        481: "Table tennis racket",
+        482: "Sushi",
+        483: "Cabinetry",
+        484: "Street light",
+        485: "Towel",
+        486: "Nightstand",
+        487: "Rabbit",
+        488: "Dolphin",
+        489: "Dog",
+        490: "Jug",
+        491: "Wok",
+        492: "Fire hydrant",
+        493: "Human eye",
+        494: "Skyscraper",
+        495: "Backpack",
+        496: "Potato",
+        497: "Paper towel",
+        498: "Lifejacket",
+        499: "Bicycle wheel",
+        500: "Toilet",
+    }
+
+    return clsid2catid, catid2name
--- a/build/lib/ppdet/data/source/coco.py
+++ b/build/lib/ppdet/data/source/coco.py
@ -0,0 +1,241 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import numpy as np
+from ppdet.core.workspace import register, serializable
+from .dataset import DetDataset
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class COCODataSet(DetDataset):
+    """
+    Load dataset with COCO format.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): coco annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        load_crowd (bool): whether to load crowded ground-truth. 
+            False as default
+        allow_empty (bool): whether to load empty entry. False as default
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
+            records. 1. as default
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 load_crowd=False,
+                 allow_empty=False,
+                 empty_ratio=1.):
+        super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
+                                          data_fields, sample_num)
+        self.load_image_only = False
+        self.load_semantic = False
+        self.load_crowd = load_crowd
+        self.allow_empty = allow_empty
+        self.empty_ratio = empty_ratio
+
+    def _sample_empty(self, records, num):
+        # if empty_ratio is out of [0. ,1.), do not sample the records
+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
+            return records
+        import random
+        sample_num = int(num * self.empty_ratio / (1 - self.empty_ratio))
+        records = random.sample(records, sample_num)
+        return records
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        from pycocotools.coco import COCO
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        img_ids.sort()
+        cat_ids = coco.getCatIds()
+        records = []
+        empty_records = []
+        ct = 0
+
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        self.cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in self.catid2clsid.items()
+        })
+
+        if 'annotations' not in coco.dataset:
+            self.load_image_only = True
+            logger.warning('Annotation file: {} does not contains ground truth '
+                           'and load image information only.'.format(anno_path))
+
+        for img_id in img_ids:
+            img_anno = coco.loadImgs([img_id])[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+
+            im_path = os.path.join(image_dir,
+                                   im_fname) if image_dir else im_fname
+            is_empty = False
+            if not os.path.exists(im_path):
+                logger.warning('Illegal image file: {}, and it will be '
+                               'ignored'.format(im_path))
+                continue
+
+            if im_w < 0 or im_h < 0:
+                logger.warning('Illegal width: {} or height: {} in annotation, '
+                               'and im_id: {} will be ignored'.format(
+                                   im_w, im_h, img_id))
+                continue
+
+            coco_rec = {
+                'im_file': im_path,
+                'im_id': np.array([img_id]),
+                'h': im_h,
+                'w': im_w,
+            } if 'image' in self.data_fields else {}
+
+            if not self.load_image_only:
+                ins_anno_ids = coco.getAnnIds(
+                    imgIds=[img_id], iscrowd=None if self.load_crowd else False)
+                instances = coco.loadAnns(ins_anno_ids)
+
+                bboxes = []
+                is_rbox_anno = False
+                for inst in instances:
+                    # check gt bbox
+                    if inst.get('ignore', False):
+                        continue
+                    if 'bbox' not in inst.keys():
+                        continue
+                    else:
+                        if not any(np.array(inst['bbox'])):
+                            continue
+
+                    # read rbox anno or not
+                    is_rbox_anno = True if len(inst['bbox']) == 5 else False
+                    if is_rbox_anno:
+                        xc, yc, box_w, box_h, angle = inst['bbox']
+                        x1 = xc - box_w / 2.0
+                        y1 = yc - box_h / 2.0
+                        x2 = x1 + box_w
+                        y2 = y1 + box_h
+                    else:
+                        x1, y1, box_w, box_h = inst['bbox']
+                        x2 = x1 + box_w
+                        y2 = y1 + box_h
+                    eps = 1e-5
+                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
+                        inst['clean_bbox'] = [
+                            round(float(x), 3) for x in [x1, y1, x2, y2]
+                        ]
+                        if is_rbox_anno:
+                            inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
+                        bboxes.append(inst)
+                    else:
+                        logger.warning(
+                            'Found an invalid bbox in annotations: im_id: {}, '
+                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                                img_id, float(inst['area']), x1, y1, x2, y2))
+
+                num_bbox = len(bboxes)
+                if num_bbox <= 0 and not self.allow_empty:
+                    continue
+                elif num_bbox <= 0:
+                    is_empty = True
+
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                if is_rbox_anno:
+                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
+                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_poly = [None] * num_bbox
+
+                has_segmentation = False
+                for i, box in enumerate(bboxes):
+                    catid = box['category_id']
+                    gt_class[i][0] = self.catid2clsid[catid]
+                    gt_bbox[i, :] = box['clean_bbox']
+                    # xc, yc, w, h, theta
+                    if is_rbox_anno:
+                        gt_rbox[i, :] = box['clean_rbox']
+                    is_crowd[i][0] = box['iscrowd']
+                    # check RLE format 
+                    if 'segmentation' in box and box['iscrowd'] == 1:
+                        gt_poly[i] = [[0.0, 0.0], ]
+                    elif 'segmentation' in box and box['segmentation']:
+                        gt_poly[i] = box['segmentation']
+                        has_segmentation = True
+
+                if has_segmentation and not any(
+                        gt_poly) and not self.allow_empty:
+                    continue
+
+                if is_rbox_anno:
+                    gt_rec = {
+                        'is_crowd': is_crowd,
+                        'gt_class': gt_class,
+                        'gt_bbox': gt_bbox,
+                        'gt_rbox': gt_rbox,
+                        'gt_poly': gt_poly,
+                    }
+                else:
+                    gt_rec = {
+                        'is_crowd': is_crowd,
+                        'gt_class': gt_class,
+                        'gt_bbox': gt_bbox,
+                        'gt_poly': gt_poly,
+                    }
+
+                for k, v in gt_rec.items():
+                    if k in self.data_fields:
+                        coco_rec[k] = v
+
+                # TODO: remove load_semantic
+                if self.load_semantic and 'semantic' in self.data_fields:
+                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
+                                            'train2017', im_fname[:-3] + 'png')
+                    coco_rec.update({'semantic': seg_path})
+
+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
+                im_path, img_id, im_h, im_w))
+            if is_empty:
+                empty_records.append(coco_rec)
+            else:
+                records.append(coco_rec)
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
+        logger.debug('{} samples in file {}'.format(ct, anno_path))
+        if len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs = records
--- a/build/lib/ppdet/data/source/dataset.py
+++ b/build/lib/ppdet/data/source/dataset.py
@ -0,0 +1,192 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import numpy as np
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from paddle.io import Dataset
+from ppdet.core.workspace import register, serializable
+from ppdet.utils.download import get_dataset_path
+import copy
+
+
+@serializable
+class DetDataset(Dataset):
+    """
+    Load detection dataset.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        use_default_label (bool): whether to load default label list.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 use_default_label=None,
+                 **kwargs):
+        super(DetDataset, self).__init__()
+        self.dataset_dir = dataset_dir if dataset_dir is not None else ''
+        self.anno_path = anno_path
+        self.image_dir = image_dir if image_dir is not None else ''
+        self.data_fields = data_fields
+        self.sample_num = sample_num
+        self.use_default_label = use_default_label
+        self._epoch = 0
+        self._curr_iter = 0
+
+    def __len__(self, ):
+        return len(self.roidbs)
+
+    def __getitem__(self, idx):
+        # data batch
+        roidb = copy.deepcopy(self.roidbs[idx])
+        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
+            n = len(self.roidbs)
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
+            n = len(self.roidbs)
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
+            n = len(self.roidbs)
+            roidb = [roidb, ] + [
+                copy.deepcopy(self.roidbs[np.random.randint(n)])
+                for _ in range(3)
+            ]
+        if isinstance(roidb, Sequence):
+            for r in roidb:
+                r['curr_iter'] = self._curr_iter
+        else:
+            roidb['curr_iter'] = self._curr_iter
+        self._curr_iter += 1
+
+        return self.transform(roidb)
+
+    def check_or_download_dataset(self):
+        self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
+                                            self.image_dir)
+
+    def set_kwargs(self, **kwargs):
+        self.mixup_epoch = kwargs.get('mixup_epoch', -1)
+        self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
+        self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
+
+    def set_transform(self, transform):
+        self.transform = transform
+
+    def set_epoch(self, epoch_id):
+        self._epoch = epoch_id
+
+    def parse_dataset(self, ):
+        raise NotImplementedError(
+            "Need to implement parse_dataset method of Dataset")
+
+    def get_anno(self):
+        if self.anno_path is None:
+            return
+        return os.path.join(self.dataset_dir, self.anno_path)
+
+
+def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
+    return f.lower().endswith(extensions)
+
+
+def _make_dataset(dir):
+    dir = os.path.expanduser(dir)
+    if not os.path.isdir(dir):
+        raise ('{} should be a dir'.format(dir))
+    images = []
+    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
+        for fname in sorted(fnames):
+            path = os.path.join(root, fname)
+            if _is_valid_file(path):
+                images.append(path)
+    return images
+
+
+@register
+@serializable
+class ImageFolder(DetDataset):
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 sample_num=-1,
+                 use_default_label=None,
+                 **kwargs):
+        super(ImageFolder, self).__init__(
+            dataset_dir,
+            image_dir,
+            anno_path,
+            sample_num=sample_num,
+            use_default_label=use_default_label)
+        self._imid2path = {}
+        self.roidbs = None
+        self.sample_num = sample_num
+
+    def check_or_download_dataset(self):
+        return
+
+    def parse_dataset(self, ):
+        if not self.roidbs:
+            self.roidbs = self._load_images()
+
+    def _parse(self):
+        image_dir = self.image_dir
+        if not isinstance(image_dir, Sequence):
+            image_dir = [image_dir]
+        images = []
+        for im_dir in image_dir:
+            if os.path.isdir(im_dir):
+                im_dir = os.path.join(self.dataset_dir, im_dir)
+                images.extend(_make_dataset(im_dir))
+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
+                images.append(im_dir)
+        return images
+
+    def _load_images(self):
+        images = self._parse()
+        ct = 0
+        records = []
+        for image in images:
+            assert image != '' and os.path.isfile(image), \
+                    "Image {} not found".format(image)
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+            rec = {'im_id': np.array([ct]), 'im_file': image}
+            self._imid2path[ct] = image
+            ct += 1
+            records.append(rec)
+        assert len(records) > 0, "No image file found"
+        return records
+
+    def get_imid2path(self):
+        return self._imid2path
+
+    def set_images(self, images):
+        self.image_dir = images
+        self.roidbs = self._load_images()
--- a/build/lib/ppdet/data/source/keypoint_coco.py
+++ b/build/lib/ppdet/data/source/keypoint_coco.py
@ -0,0 +1,656 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import cv2
+import numpy as np
+import json
+import copy
+import pycocotools
+from pycocotools.coco import COCO
+from .dataset import DetDataset
+from ppdet.core.workspace import register, serializable
+
+
+@serializable
+class KeypointBottomUpBaseDataset(DetDataset):
+    """Base class for bottom-up datasets.
+
+    All datasets should subclass it.
+    All subclasses should overwrite:
+        Methods:`_get_imganno`
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        anno_path (str): Relative path to the annotation file.
+        image_dir (str): Path to a directory where images are held.
+            Default: None.
+        num_joints (int): keypoint numbers
+        transform (composed(operators)): A sequence of data transforms.
+        shard (list): [rank, worldsize], the distributed env params
+        test_mode (bool): Store True when building test or
+            validation dataset. Default: False.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 transform=[],
+                 shard=[0, 1],
+                 test_mode=False):
+        super().__init__(dataset_dir, image_dir, anno_path)
+        self.image_info = {}
+        self.ann_info = {}
+
+        self.img_prefix = os.path.join(dataset_dir, image_dir)
+        self.transform = transform
+        self.test_mode = test_mode
+
+        self.ann_info['num_joints'] = num_joints
+        self.img_ids = []
+
+    def __len__(self):
+        """Get dataset length."""
+        return len(self.img_ids)
+
+    def _get_imganno(self, idx):
+        """Get anno for a single image."""
+        raise NotImplementedError
+
+    def __getitem__(self, idx):
+        """Prepare image for training given the index."""
+        records = copy.deepcopy(self._get_imganno(idx))
+        records['image'] = cv2.imread(records['image_file'])
+        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
+        records['mask'] = (records['mask'] + 0).astype('uint8')
+        records = self.transform(records)
+        return records
+
+    def parse_dataset(self):
+        return
+
+
+@register
+@serializable
+class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
+    """COCO dataset for bottom-up pose estimation.
+
+    The dataset loads raw features and apply specified transforms
+    to return a dict containing the image tensors and other information.
+
+    COCO keypoint indexes::
+
+        0: 'nose',
+        1: 'left_eye',
+        2: 'right_eye',
+        3: 'left_ear',
+        4: 'right_ear',
+        5: 'left_shoulder',
+        6: 'right_shoulder',
+        7: 'left_elbow',
+        8: 'right_elbow',
+        9: 'left_wrist',
+        10: 'right_wrist',
+        11: 'left_hip',
+        12: 'right_hip',
+        13: 'left_knee',
+        14: 'right_knee',
+        15: 'left_ankle',
+        16: 'right_ankle'
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        anno_path (str): Relative path to the annotation file.
+        image_dir (str): Path to a directory where images are held.
+            Default: None.
+        num_joints (int): keypoint numbers
+        transform (composed(operators)): A sequence of data transforms.
+        shard (list): [rank, worldsize], the distributed env params
+        test_mode (bool): Store True when building test or
+            validation dataset. Default: False.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 transform=[],
+                 shard=[0, 1],
+                 test_mode=False):
+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
+                         transform, shard, test_mode)
+
+        ann_file = os.path.join(dataset_dir, anno_path)
+        self.coco = COCO(ann_file)
+
+        self.img_ids = self.coco.getImgIds()
+        if not test_mode:
+            self.img_ids = [
+                img_id for img_id in self.img_ids
+                if len(self.coco.getAnnIds(
+                    imgIds=img_id, iscrowd=None)) > 0
+            ]
+        blocknum = int(len(self.img_ids) / shard[1])
+        self.img_ids = self.img_ids[(blocknum * shard[0]):(blocknum * (shard[0]
+                                                                       + 1))]
+        self.num_images = len(self.img_ids)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.dataset_name = 'coco'
+
+        cat_ids = self.coco.getCatIds()
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        print(f'=> num_images: {self.num_images}')
+
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+
+        Returns:
+            tuple: Image name & id mapping dicts.
+
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+
+        return id2name, name2id
+
+    def _get_imganno(self, idx):
+        """Get anno for a single image.
+
+        Args:
+            idx (int): image idx
+
+        Returns:
+            dict: info for model training
+        """
+        coco = self.coco
+        img_id = self.img_ids[idx]
+        ann_ids = coco.getAnnIds(imgIds=img_id)
+        anno = coco.loadAnns(ann_ids)
+
+        mask = self._get_mask(anno, idx)
+        anno = [
+            obj for obj in anno
+            if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+        ]
+
+        joints, orgsize = self._get_joints(anno, idx)
+
+        db_rec = {}
+        db_rec['im_id'] = img_id
+        db_rec['image_file'] = os.path.join(self.img_prefix,
+                                            self.id2name[img_id])
+        db_rec['mask'] = mask
+        db_rec['joints'] = joints
+        db_rec['im_shape'] = orgsize
+
+        return db_rec
+
+    def _get_joints(self, anno, idx):
+        """Get joints for all people in an image."""
+        num_people = len(anno)
+
+        joints = np.zeros(
+            (num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
+
+        for i, obj in enumerate(anno):
+            joints[i, :self.ann_info['num_joints'], :3] = \
+                np.array(obj['keypoints']).reshape([-1, 3])
+
+        img_info = self.coco.loadImgs(self.img_ids[idx])[0]
+        joints[..., 0] /= img_info['width']
+        joints[..., 1] /= img_info['height']
+        orgsize = np.array([img_info['height'], img_info['width']])
+
+        return joints, orgsize
+
+    def _get_mask(self, anno, idx):
+        """Get ignore masks to mask out losses."""
+        coco = self.coco
+        img_info = coco.loadImgs(self.img_ids[idx])[0]
+
+        m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
+
+        for obj in anno:
+            if 'segmentation' in obj:
+                if obj['iscrowd']:
+                    rle = pycocotools.mask.frPyObjects(obj['segmentation'],
+                                                       img_info['height'],
+                                                       img_info['width'])
+                    m += pycocotools.mask.decode(rle)
+                elif obj['num_keypoints'] == 0:
+                    rles = pycocotools.mask.frPyObjects(obj['segmentation'],
+                                                        img_info['height'],
+                                                        img_info['width'])
+                    for rle in rles:
+                        m += pycocotools.mask.decode(rle)
+
+        return m < 0.5
+
+
+@register
+@serializable
+class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
+    """CrowdPose dataset for bottom-up pose estimation.
+
+    The dataset loads raw features and apply specified transforms
+    to return a dict containing the image tensors and other information.
+
+    CrowdPose keypoint indexes::
+
+        0: 'left_shoulder',
+        1: 'right_shoulder',
+        2: 'left_elbow',
+        3: 'right_elbow',
+        4: 'left_wrist',
+        5: 'right_wrist',
+        6: 'left_hip',
+        7: 'right_hip',
+        8: 'left_knee',
+        9: 'right_knee',
+        10: 'left_ankle',
+        11: 'right_ankle',
+        12: 'top_head',
+        13: 'neck'
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        anno_path (str): Relative path to the annotation file.
+        image_dir (str): Path to a directory where images are held.
+            Default: None.
+        num_joints (int): keypoint numbers
+        transform (composed(operators)): A sequence of data transforms.
+        shard (list): [rank, worldsize], the distributed env params
+        test_mode (bool): Store True when building test or
+            validation dataset. Default: False.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 transform=[],
+                 shard=[0, 1],
+                 test_mode=False):
+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
+                         transform, shard, test_mode)
+
+        ann_file = os.path.join(dataset_dir, anno_path)
+
+        self.coco = COCO(ann_file)
+
+        self.img_ids = self.coco.getImgIds()
+        if not test_mode:
+            self.img_ids = [
+                img_id for img_id in self.img_ids
+                if len(self.coco.getAnnIds(
+                    imgIds=img_id, iscrowd=None)) > 0
+            ]
+        blocknum = int(len(self.img_ids) / shard[1])
+        self.img_ids = self.img_ids[(blocknum * shard[0]):(blocknum * (shard[0]
+                                                                       + 1))]
+        self.num_images = len(self.img_ids)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+
+        self.dataset_name = 'crowdpose'
+        print('=> num_images: {}'.format(self.num_images))
+
+
+@serializable
+class KeypointTopDownBaseDataset(DetDataset):
+    """Base class for top_down datasets.
+
+    All datasets should subclass it.
+    All subclasses should overwrite:
+        Methods:`_get_db`
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        image_dir (str): Path to a directory where images are held.
+        anno_path (str): Relative path to the annotation file.
+        num_joints (int): keypoint numbers
+        transform (composed(operators)): A sequence of data transforms.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 transform=[]):
+        super().__init__(dataset_dir, image_dir, anno_path)
+        self.image_info = {}
+        self.ann_info = {}
+
+        self.img_prefix = os.path.join(dataset_dir, image_dir)
+        self.transform = transform
+
+        self.ann_info['num_joints'] = num_joints
+        self.db = []
+
+    def __len__(self):
+        """Get dataset length."""
+        return len(self.db)
+
+    def _get_db(self):
+        """Get a sample"""
+        raise NotImplementedError
+
+    def __getitem__(self, idx):
+        """Prepare sample for training given the index."""
+        records = copy.deepcopy(self.db[idx])
+        records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
+                                      cv2.IMREAD_IGNORE_ORIENTATION)
+        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
+        records['score'] = records['score'] if 'score' in records else 1
+        records = self.transform(records)
+        # print('records', records)
+        return records
+
+
+@register
+@serializable
+class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
+    """COCO dataset for top-down pose estimation.
+
+    The dataset loads raw features and apply specified transforms
+    to return a dict containing the image tensors and other information.
+
+    COCO keypoint indexes:
+
+        0: 'nose',
+        1: 'left_eye',
+        2: 'right_eye',
+        3: 'left_ear',
+        4: 'right_ear',
+        5: 'left_shoulder',
+        6: 'right_shoulder',
+        7: 'left_elbow',
+        8: 'right_elbow',
+        9: 'left_wrist',
+        10: 'right_wrist',
+        11: 'left_hip',
+        12: 'right_hip',
+        13: 'left_knee',
+        14: 'right_knee',
+        15: 'left_ankle',
+        16: 'right_ankle'
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        image_dir (str): Path to a directory where images are held.
+        anno_path (str): Relative path to the annotation file.
+        num_joints (int): Keypoint numbers
+        trainsize (list):[w, h] Image target size
+        transform (composed(operators)): A sequence of data transforms.
+        bbox_file (str): Path to a detection bbox file
+            Default: None.
+        use_gt_bbox (bool): Whether to use ground truth bbox
+            Default: True.
+        pixel_std (int): The pixel std of the scale
+            Default: 200.
+        image_thre (float): The threshold to filter the detection box
+            Default: 0.0.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 trainsize,
+                 transform=[],
+                 bbox_file=None,
+                 use_gt_bbox=True,
+                 pixel_std=200,
+                 image_thre=0.0):
+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
+                         transform)
+
+        self.bbox_file = bbox_file
+        self.use_gt_bbox = use_gt_bbox
+        self.trainsize = trainsize
+        self.pixel_std = pixel_std
+        self.image_thre = image_thre
+        self.dataset_name = 'coco'
+
+    def parse_dataset(self):
+        if self.use_gt_bbox:
+            self.db = self._load_coco_keypoint_annotations()
+        else:
+            self.db = self._load_coco_person_detection_results()
+
+    def _load_coco_keypoint_annotations(self):
+        coco = COCO(self.get_anno())
+        img_ids = coco.getImgIds()
+        gt_db = []
+        for index in img_ids:
+            im_ann = coco.loadImgs(index)[0]
+            width = im_ann['width']
+            height = im_ann['height']
+            file_name = im_ann['file_name']
+            im_id = int(im_ann["id"])
+
+            annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
+            objs = coco.loadAnns(annIds)
+
+            valid_objs = []
+            for obj in objs:
+                x, y, w, h = obj['bbox']
+                x1 = np.max((0, x))
+                y1 = np.max((0, y))
+                x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
+                y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
+                if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
+                    obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                    valid_objs.append(obj)
+            objs = valid_objs
+
+            rec = []
+            for obj in objs:
+                if max(obj['keypoints']) == 0:
+                    continue
+
+                joints = np.zeros(
+                    (self.ann_info['num_joints'], 3), dtype=np.float)
+                joints_vis = np.zeros(
+                    (self.ann_info['num_joints'], 3), dtype=np.float)
+                for ipt in range(self.ann_info['num_joints']):
+                    joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
+                    joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
+                    joints[ipt, 2] = 0
+                    t_vis = obj['keypoints'][ipt * 3 + 2]
+                    if t_vis > 1:
+                        t_vis = 1
+                    joints_vis[ipt, 0] = t_vis
+                    joints_vis[ipt, 1] = t_vis
+                    joints_vis[ipt, 2] = 0
+
+                center, scale = self._box2cs(obj['clean_bbox'][:4])
+                rec.append({
+                    'image_file': os.path.join(self.img_prefix, file_name),
+                    'center': center,
+                    'scale': scale,
+                    'joints': joints,
+                    'joints_vis': joints_vis,
+                    'im_id': im_id,
+                })
+            gt_db.extend(rec)
+
+        return gt_db
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+        aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
+
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
+            dtype=np.float32)
+        if center[0] != -1:
+            scale = scale * 1.25
+
+        return center, scale
+
+    def _load_coco_person_detection_results(self):
+        all_boxes = None
+        bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
+        with open(bbox_file_path, 'r') as f:
+            all_boxes = json.load(f)
+
+        if not all_boxes:
+            print('=> Load %s fail!' % bbox_file_path)
+            return None
+
+        kpt_db = []
+        for n_img in range(0, len(all_boxes)):
+            det_res = all_boxes[n_img]
+            if det_res['category_id'] != 1:
+                continue
+            file_name = det_res[
+                'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
+                    'image_id']
+            img_name = os.path.join(self.img_prefix, file_name)
+            box = det_res['bbox']
+            score = det_res['score']
+            im_id = int(det_res['image_id'])
+
+            if score < self.image_thre:
+                continue
+
+            center, scale = self._box2cs(box)
+            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
+            joints_vis = np.ones(
+                (self.ann_info['num_joints'], 3), dtype=np.float)
+            kpt_db.append({
+                'image_file': img_name,
+                'im_id': im_id,
+                'center': center,
+                'scale': scale,
+                'score': score,
+                'joints': joints,
+                'joints_vis': joints_vis,
+            })
+
+        return kpt_db
+
+
+@register
+@serializable
+class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
+    """MPII dataset for topdown pose estimation.
+
+    The dataset loads raw features and apply specified transforms
+    to return a dict containing the image tensors and other information.
+
+    MPII keypoint indexes::
+
+        0: 'right_ankle',
+        1: 'right_knee',
+        2: 'right_hip',
+        3: 'left_hip',
+        4: 'left_knee',
+        5: 'left_ankle',
+        6: 'pelvis',
+        7: 'thorax',
+        8: 'upper_neck',
+        9: 'head_top',
+        10: 'right_wrist',
+        11: 'right_elbow',
+        12: 'right_shoulder',
+        13: 'left_shoulder',
+        14: 'left_elbow',
+        15: 'left_wrist',
+
+    Args:
+        dataset_dir (str): Root path to the dataset.
+        image_dir (str): Path to a directory where images are held.
+        anno_path (str): Relative path to the annotation file.
+        num_joints (int): Keypoint numbers
+        trainsize (list):[w, h] Image target size
+        transform (composed(operators)): A sequence of data transforms.
+    """
+
+    def __init__(self,
+                 dataset_dir,
+                 image_dir,
+                 anno_path,
+                 num_joints,
+                 transform=[]):
+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
+                         transform)
+
+        self.dataset_name = 'mpii'
+
+    def parse_dataset(self):
+        with open(self.get_anno()) as anno_file:
+            anno = json.load(anno_file)
+
+        gt_db = []
+        for a in anno:
+            image_name = a['image']
+            im_id = a['image_id'] if 'image_id' in a else int(
+                os.path.splitext(image_name)[0])
+
+            c = np.array(a['center'], dtype=np.float)
+            s = np.array([a['scale'], a['scale']], dtype=np.float)
+
+            # Adjust center/scale slightly to avoid cropping limbs
+            if c[0] != -1:
+                c[1] = c[1] + 15 * s[1]
+                s = s * 1.25
+            c = c - 1
+
+            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
+            joints_vis = np.zeros(
+                (self.ann_info['num_joints'], 3), dtype=np.float)
+            if 'joints' in a:
+                joints_ = np.array(a['joints'])
+                joints_[:, 0:2] = joints_[:, 0:2] - 1
+                joints_vis_ = np.array(a['joints_vis'])
+                assert len(joints_) == self.ann_info[
+                    'num_joints'], 'joint num diff: {} vs {}'.format(
+                        len(joints_), self.ann_info['num_joints'])
+
+                joints[:, 0:2] = joints_[:, 0:2]
+                joints_vis[:, 0] = joints_vis_[:]
+                joints_vis[:, 1] = joints_vis_[:]
+
+            gt_db.append({
+                'image_file': os.path.join(self.img_prefix, image_name),
+                'im_id': im_id,
+                'center': c,
+                'scale': s,
+                'joints': joints,
+                'joints_vis': joints_vis
+            })
+        self.db = gt_db
--- a/build/lib/ppdet/data/source/mot.py
+++ b/build/lib/ppdet/data/source/mot.py
@ -0,0 +1,360 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import cv2
+import numpy as np
+from collections import OrderedDict
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from .dataset import DetDataset, _make_dataset, _is_valid_file
+from ppdet.core.workspace import register, serializable
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class MOTDataSet(DetDataset):
+    """
+    Load dataset with MOT format.
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_lists (str|list): mot data image lists, muiti-source mot dataset.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+
+    Notes:
+        MOT datasets root directory following this:
+            dataset/mot
+            |——————image_lists
+            |        |——————caltech.train  
+            |        |——————caltech.val   
+            |        |——————mot16.train  
+            |        |——————mot17.train  
+            |        ......
+            |——————Caltech
+            |——————MOT17
+            |——————......
+
+        All the MOT datasets have the following structure:
+            Caltech
+            |——————images
+            |        └——————00001.jpg
+            |        |—————— ...
+            |        └——————0000N.jpg
+            └——————labels_with_ids
+                        └——————00001.txt
+                        |—————— ...
+                        └——————0000N.txt
+            or
+
+            MOT17
+            |——————images
+            |        └——————train
+            |        └——————test
+            └——————labels_with_ids
+                        └——————train
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_lists=[],
+                 data_fields=['image'],
+                 sample_num=-1):
+        super(MOTDataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            data_fields=data_fields,
+            sample_num=sample_num)
+        self.dataset_dir = dataset_dir
+        self.image_lists = image_lists
+        if isinstance(self.image_lists, str):
+            self.image_lists = [self.image_lists]
+        self.roidbs = None
+        self.cname2cid = None
+
+    def get_anno(self):
+        if self.image_lists == []:
+            return
+        # only used to get categories and metric
+        return os.path.join(self.dataset_dir, 'image_lists',
+                            self.image_lists[0])
+
+    def parse_dataset(self):
+        self.img_files = OrderedDict()
+        self.img_start_index = OrderedDict()
+        self.label_files = OrderedDict()
+        self.tid_num = OrderedDict()
+        self.tid_start_index = OrderedDict()
+
+        img_index = 0
+        for data_name in self.image_lists:
+            # check every data image list
+            image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
+            assert os.path.isdir(image_lists_dir), \
+                "The {} is not a directory.".format(image_lists_dir)
+
+            list_path = os.path.join(image_lists_dir, data_name)
+            assert os.path.exists(list_path), \
+                "The list path {} does not exist.".format(list_path)
+
+            # record img_files, filter out empty ones
+            with open(list_path, 'r') as file:
+                self.img_files[data_name] = file.readlines()
+                self.img_files[data_name] = [
+                    os.path.join(self.dataset_dir, x.strip())
+                    for x in self.img_files[data_name]
+                ]
+                self.img_files[data_name] = list(
+                    filter(lambda x: len(x) > 0, self.img_files[data_name]))
+
+                self.img_start_index[data_name] = img_index
+                img_index += len(self.img_files[data_name])
+
+            # record label_files
+            self.label_files[data_name] = [
+                x.replace('images', 'labels_with_ids').replace(
+                    '.png', '.txt').replace('.jpg', '.txt')
+                for x in self.img_files[data_name]
+            ]
+
+        for data_name, label_paths in self.label_files.items():
+            max_index = -1
+            for lp in label_paths:
+                lb = np.loadtxt(lp)
+                if len(lb) < 1:
+                    continue
+                if len(lb.shape) < 2:
+                    img_max = lb[1]
+                else:
+                    img_max = np.max(lb[:, 1])
+                if img_max > max_index:
+                    max_index = img_max
+            self.tid_num[data_name] = int(max_index + 1)
+
+        last_index = 0
+        for i, (k, v) in enumerate(self.tid_num.items()):
+            self.tid_start_index[k] = last_index
+            last_index += v
+
+        self.total_identities = int(last_index + 1)
+        self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
+        self.total_imgs = sum(self.num_imgs_each_data)
+
+        logger.info('=' * 80)
+        logger.info('MOT dataset summary: ')
+        logger.info(self.tid_num)
+        logger.info('total images: {}'.format(self.total_imgs))
+        logger.info('image start index: {}'.format(self.img_start_index))
+        logger.info('total identities: {}'.format(self.total_identities))
+        logger.info('identity start index: {}'.format(self.tid_start_index))
+        logger.info('=' * 80)
+
+        records = []
+        cname2cid = mot_label()
+
+        for img_index in range(self.total_imgs):
+            for i, (k, v) in enumerate(self.img_start_index.items()):
+                if img_index >= v:
+                    data_name = list(self.label_files.keys())[i]
+                    start_index = v
+            img_file = self.img_files[data_name][img_index - start_index]
+            lbl_file = self.label_files[data_name][img_index - start_index]
+
+            if not os.path.exists(img_file):
+                logger.warning('Illegal image file: {}, and it will be ignored'.
+                               format(img_file))
+                continue
+            if not os.path.isfile(lbl_file):
+                logger.warning('Illegal label file: {}, and it will be ignored'.
+                               format(lbl_file))
+                continue
+
+            labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
+            # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
+
+            cx, cy = labels[:, 2], labels[:, 3]
+            w, h = labels[:, 4], labels[:, 5]
+            gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
+            gt_class = labels[:, 0:1].astype('int32')
+            gt_score = np.ones((len(labels), 1)).astype('float32')
+            gt_ide = labels[:, 1:2].astype('int32')
+            for i, _ in enumerate(gt_ide):
+                if gt_ide[i] > -1:
+                    gt_ide[i] += self.tid_start_index[data_name]
+
+            mot_rec = {
+                'im_file': img_file,
+                'im_id': img_index,
+            } if 'image' in self.data_fields else {}
+
+            gt_rec = {
+                'gt_class': gt_class,
+                'gt_score': gt_score,
+                'gt_bbox': gt_bbox,
+                'gt_ide': gt_ide,
+            }
+
+            for k, v in gt_rec.items():
+                if k in self.data_fields:
+                    mot_rec[k] = v
+
+            records.append(mot_rec)
+            if self.sample_num > 0 and img_index >= self.sample_num:
+                break
+        assert len(records) > 0, 'not found any mot record in %s' % (
+            self.image_lists)
+        self.roidbs, self.cname2cid = records, cname2cid
+
+
+def mot_label():
+    labels_map = {'person': 0}
+    return labels_map
+
+
+@register
+@serializable
+class MOTImageFolder(DetDataset):
+    def __init__(self,
+                 task,
+                 dataset_dir=None,
+                 data_root=None,
+                 image_dir=None,
+                 sample_num=-1,
+                 keep_ori_im=False,
+                 **kwargs):
+        super(MOTImageFolder, self).__init__(
+            dataset_dir, image_dir, sample_num=sample_num)
+        self.task = task
+        self.data_root = data_root
+        self.keep_ori_im = keep_ori_im
+        self._imid2path = {}
+        self.roidbs = None
+
+    def check_or_download_dataset(self):
+        return
+
+    def parse_dataset(self, ):
+        if not self.roidbs:
+            self.roidbs = self._load_images()
+
+    def _parse(self):
+        image_dir = self.image_dir
+        if not isinstance(image_dir, Sequence):
+            image_dir = [image_dir]
+        images = []
+        for im_dir in image_dir:
+            if os.path.isdir(im_dir):
+                im_dir = os.path.join(self.dataset_dir, im_dir)
+                images.extend(_make_dataset(im_dir))
+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
+                images.append(im_dir)
+        return images
+
+    def _load_images(self):
+        images = self._parse()
+        ct = 0
+        records = []
+        for image in images:
+            assert image != '' and os.path.isfile(image), \
+                    "Image {} not found".format(image)
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+            rec = {'im_id': np.array([ct]), 'im_file': image}
+            if self.keep_ori_im:
+                rec.update({'keep_ori_im': 1})
+            self._imid2path[ct] = image
+            ct += 1
+            records.append(rec)
+        assert len(records) > 0, "No image file found"
+        return records
+
+    def get_imid2path(self):
+        return self._imid2path
+
+    def set_images(self, images):
+        self.image_dir = images
+        self.roidbs = self._load_images()
+
+
+def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
+    return f.lower().endswith(extensions)
+
+
+@register
+@serializable
+class MOTVideoDataset(DetDataset):
+    """
+    Load MOT dataset with MOT format from video for inference.
+    Args:
+        video_file (str): path of the video file
+        dataset_dir (str): root directory for dataset.
+        keep_ori_im (bool): whether to keep original image, default False. 
+            Set True when used during MOT model inference while saving
+            images or video, or used in DeepSORT.
+    """
+
+    def __init__(self,
+                 video_file='',
+                 dataset_dir=None,
+                 keep_ori_im=False,
+                 **kwargs):
+        super(MOTVideoDataset, self).__init__(dataset_dir=dataset_dir)
+        self.video_file = video_file
+        self.dataset_dir = dataset_dir
+        self.keep_ori_im = keep_ori_im
+        self.roidbs = None
+
+    def parse_dataset(self, ):
+        if not self.roidbs:
+            self.roidbs = self._load_video_images()
+
+    def _load_video_images(self):
+        self.cap = cv2.VideoCapture(self.video_file)
+        self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
+        logger.info('Length of the video: {:d} frames'.format(self.vn))
+        res = True
+        ct = 0
+        records = []
+        while res:
+            res, img = self.cap.read()
+            image = np.ascontiguousarray(img, dtype=np.float32)
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            im_shape = image.shape
+            rec = {
+                'im_id': np.array([ct]),
+                'image': image,
+                'h': im_shape[0],
+                'w': im_shape[1],
+                'im_shape': np.array(
+                    im_shape[:2], dtype=np.float32),
+                'scale_factor': np.array(
+                    [1., 1.], dtype=np.float32),
+            }
+            if self.keep_ori_im:
+                rec.update({'ori_image': image})
+            ct += 1
+            records.append(rec)
+        records = records[:-1]
+        assert len(records) > 0, "No image file found"
+        return records
+
+    def set_video(self, video_file):
+        self.video_file = video_file
+        assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
+                "wrong or unsupported file format: {}".format(self.video_file)
+        self.roidbs = self._load_video_images()
--- a/build/lib/ppdet/data/source/voc.py
+++ b/build/lib/ppdet/data/source/voc.py
@ -0,0 +1,205 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import xml.etree.ElementTree as ET
+
+from ppdet.core.workspace import register, serializable
+
+from .dataset import DetDataset
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class VOCDataSet(DetDataset):
+    """
+    Load dataset with PascalVOC format.
+
+    Notes:
+    `anno_path` must contains xml file and image file path for annotations.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): voc annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        label_list (str): if use_default_label is False, will load
+            mapping between category and class index.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 label_list=None):
+        super(VOCDataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            image_dir=image_dir,
+            anno_path=anno_path,
+            data_fields=data_fields,
+            sample_num=sample_num)
+        self.label_list = label_list
+
+    def parse_dataset(self, ):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        # mapping category name to class id
+        # first_class:0, second_class:1, ...
+        records = []
+        ct = 0
+        cname2cid = {}
+        if self.label_list:
+            label_path = os.path.join(self.dataset_dir, self.label_list)
+            if not os.path.exists(label_path):
+                raise ValueError("label_list {} does not exists".format(
+                    label_path))
+            with open(label_path, 'r') as fr:
+                label_id = 0
+                for line in fr.readlines():
+                    cname2cid[line.strip()] = label_id
+                    label_id += 1
+        else:
+            cname2cid = pascalvoc_label()
+
+        with open(anno_path, 'r') as fr:
+            while True:
+                line = fr.readline()
+                if not line:
+                    break
+                img_file, xml_file = [os.path.join(image_dir, x) \
+                        for x in line.strip().split()[:2]]
+                if not os.path.exists(img_file):
+                    logger.warning(
+                        'Illegal image file: {}, and it will be ignored'.format(
+                            img_file))
+                    continue
+                if not os.path.isfile(xml_file):
+                    logger.warning(
+                        'Illegal xml file: {}, and it will be ignored'.format(
+                            xml_file))
+                    continue
+                tree = ET.parse(xml_file)
+                if tree.find('id') is None:
+                    im_id = np.array([ct])
+                else:
+                    im_id = np.array([int(tree.find('id').text)])
+
+                objs = tree.findall('object')
+                im_w = float(tree.find('size').find('width').text)
+                im_h = float(tree.find('size').find('height').text)
+                if im_w < 0 or im_h < 0:
+                    logger.warning(
+                        'Illegal width: {} or height: {} in annotation, '
+                        'and {} will be ignored'.format(im_w, im_h, xml_file))
+                    continue
+                gt_bbox = []
+                gt_class = []
+                gt_score = []
+                difficult = []
+                for i, obj in enumerate(objs):
+                    cname = obj.find('name').text
+
+                    # user dataset may not contain difficult field
+                    _difficult = obj.find('difficult')
+                    _difficult = int(
+                        _difficult.text) if _difficult is not None else 0
+
+                    x1 = float(obj.find('bndbox').find('xmin').text)
+                    y1 = float(obj.find('bndbox').find('ymin').text)
+                    x2 = float(obj.find('bndbox').find('xmax').text)
+                    y2 = float(obj.find('bndbox').find('ymax').text)
+                    x1 = max(0, x1)
+                    y1 = max(0, y1)
+                    x2 = min(im_w - 1, x2)
+                    y2 = min(im_h - 1, y2)
+                    if x2 > x1 and y2 > y1:
+                        gt_bbox.append([x1, y1, x2, y2])
+                        gt_class.append([cname2cid[cname]])
+                        gt_score.append([1.])
+                        difficult.append([_difficult])
+                    else:
+                        logger.warning(
+                            'Found an invalid bbox in annotations: xml_file: {}'
+                            ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                                xml_file, x1, y1, x2, y2))
+                gt_bbox = np.array(gt_bbox).astype('float32')
+                gt_class = np.array(gt_class).astype('int32')
+                gt_score = np.array(gt_score).astype('float32')
+                difficult = np.array(difficult).astype('int32')
+
+                voc_rec = {
+                    'im_file': img_file,
+                    'im_id': im_id,
+                    'h': im_h,
+                    'w': im_w
+                } if 'image' in self.data_fields else {}
+
+                gt_rec = {
+                    'gt_class': gt_class,
+                    'gt_score': gt_score,
+                    'gt_bbox': gt_bbox,
+                    'difficult': difficult
+                }
+                for k, v in gt_rec.items():
+                    if k in self.data_fields:
+                        voc_rec[k] = v
+
+                if len(objs) != 0:
+                    records.append(voc_rec)
+
+                ct += 1
+                if self.sample_num > 0 and ct >= self.sample_num:
+                    break
+        assert len(records) > 0, 'not found any voc record in %s' % (
+            self.anno_path)
+        logger.debug('{} samples in file {}'.format(ct, anno_path))
+        self.roidbs, self.cname2cid = records, cname2cid
+
+    def get_label_list(self):
+        return os.path.join(self.dataset_dir, self.label_list)
+
+
+def pascalvoc_label():
+    labels_map = {
+        'aeroplane': 0,
+        'bicycle': 1,
+        'bird': 2,
+        'boat': 3,
+        'bottle': 4,
+        'bus': 5,
+        'car': 6,
+        'cat': 7,
+        'chair': 8,
+        'cow': 9,
+        'diningtable': 10,
+        'dog': 11,
+        'horse': 12,
+        'motorbike': 13,
+        'person': 14,
+        'pottedplant': 15,
+        'sheep': 16,
+        'sofa': 17,
+        'train': 18,
+        'tvmonitor': 19
+    }
+    return labels_map
--- a/build/lib/ppdet/data/source/widerface.py
+++ b/build/lib/ppdet/data/source/widerface.py
@ -0,0 +1,180 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+from ppdet.core.workspace import register, serializable
+from .dataset import DetDataset
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class WIDERFaceDataSet(DetDataset):
+    """
+    Load WiderFace records with 'anno_path'
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): WiderFace annotation data.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        with_lmk (bool): whether to load face landmark keypoint labels.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 with_lmk=False):
+        super(WIDERFaceDataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            image_dir=image_dir,
+            anno_path=anno_path,
+            data_fields=data_fields,
+            sample_num=sample_num,
+            with_lmk=with_lmk)
+        self.anno_path = anno_path
+        self.sample_num = sample_num
+        self.roidbs = None
+        self.cname2cid = None
+        self.with_lmk = with_lmk
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        txt_file = anno_path
+
+        records = []
+        ct = 0
+        file_lists = self._load_file_list(txt_file)
+        cname2cid = widerface_label()
+
+        for item in file_lists:
+            im_fname = item[0]
+            im_id = np.array([ct])
+            gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
+            gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
+            gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
+            lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
+            for index_box in range(len(item)):
+                if index_box < 1:
+                    continue
+                gt_bbox[index_box - 1] = item[index_box][0]
+                if self.with_lmk:
+                    gt_lmk_labels[index_box - 1] = item[index_box][1]
+                    lmk_ignore_flag[index_box - 1] = item[index_box][2]
+            im_fname = os.path.join(image_dir,
+                                    im_fname) if image_dir else im_fname
+            widerface_rec = {
+                'im_file': im_fname,
+                'im_id': im_id,
+            } if 'image' in self.data_fields else {}
+            gt_rec = {
+                'gt_bbox': gt_bbox,
+                'gt_class': gt_class,
+            }
+            for k, v in gt_rec.items():
+                if k in self.data_fields:
+                    widerface_rec[k] = v
+            if self.with_lmk:
+                widerface_rec['gt_keypoint'] = gt_lmk_labels
+                widerface_rec['keypoint_ignore'] = lmk_ignore_flag
+
+            if len(item) != 0:
+                records.append(widerface_rec)
+
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
+        logger.debug('{} samples in file {}'.format(ct, anno_path))
+        self.roidbs, self.cname2cid = records, cname2cid
+
+    def _load_file_list(self, input_txt):
+        with open(input_txt, 'r') as f_dir:
+            lines_input_txt = f_dir.readlines()
+
+        file_dict = {}
+        num_class = 0
+        exts = ['jpg', 'jpeg', 'png', 'bmp']
+        exts += [ext.upper() for ext in exts]
+        for i in range(len(lines_input_txt)):
+            line_txt = lines_input_txt[i].strip('\n\t\r')
+            split_str = line_txt.split(' ')
+            if len(split_str) == 1:
+                img_file_name = os.path.split(split_str[0])[1]
+                split_txt = img_file_name.split('.')
+                if len(split_txt) < 2:
+                    continue
+                elif split_txt[-1] in exts:
+                    if i != 0:
+                        num_class += 1
+                    file_dict[num_class] = [line_txt]
+            else:
+                if len(line_txt) <= 6:
+                    continue
+                result_boxs = []
+                xmin = float(split_str[0])
+                ymin = float(split_str[1])
+                w = float(split_str[2])
+                h = float(split_str[3])
+                # Filter out wrong labels
+                if w < 0 or h < 0:
+                    logger.warning('Illegal box with w: {}, h: {} in '
+                                   'img: {}, and it will be ignored'.format(
+                                       w, h, file_dict[num_class][0]))
+                    continue
+                xmin = max(0, xmin)
+                ymin = max(0, ymin)
+                xmax = xmin + w
+                ymax = ymin + h
+                gt_bbox = [xmin, ymin, xmax, ymax]
+                result_boxs.append(gt_bbox)
+                if self.with_lmk:
+                    assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
+                            'of characters per line in the annotation file should' \
+                            'exceed 18.'
+                    lmk0_x = float(split_str[5])
+                    lmk0_y = float(split_str[6])
+                    lmk1_x = float(split_str[8])
+                    lmk1_y = float(split_str[9])
+                    lmk2_x = float(split_str[11])
+                    lmk2_y = float(split_str[12])
+                    lmk3_x = float(split_str[14])
+                    lmk3_y = float(split_str[15])
+                    lmk4_x = float(split_str[17])
+                    lmk4_y = float(split_str[18])
+                    lmk_ignore_flag = 0 if lmk0_x == -1 else 1
+                    gt_lmk_label = [
+                        lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
+                        lmk3_y, lmk4_x, lmk4_y
+                    ]
+                    result_boxs.append(gt_lmk_label)
+                    result_boxs.append(lmk_ignore_flag)
+                file_dict[num_class].append(result_boxs)
+
+        return list(file_dict.values())
+
+
+def widerface_label():
+    labels_map = {'face': 0}
+    return labels_map
--- a/build/lib/ppdet/data/transform/init.py
+++ b/build/lib/ppdet/data/transform/init.py
@ -0,0 +1,28 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import operators
+from . import batch_operators
+from . import keypoint_operators
+from . import mot_operators
+
+from .operators import *
+from .batch_operators import *
+from .keypoint_operators import *
+from .mot_operators import *
+
+__all__ = []
+__all__ += registered_ops
+__all__ += keypoint_operators.__all__
+__all__ += mot_operators.__all__
--- a/build/lib/ppdet/data/transform/autoaugment_utils.py
+++ b/build/lib/ppdet/data/transform/autoaugment_utils.py
--- a/build/lib/ppdet/data/transform/batch_operators.py
+++ b/build/lib/ppdet/data/transform/batch_operators.py
@ -0,0 +1,748 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+
+import cv2
+import numpy as np
+from .operators import register_op, BaseOperator, Resize
+from .op_helper import jaccard_overlap, gaussian2D
+from scipy import ndimage
+
+from ppdet.modeling import bbox_utils
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
+    'Gt2TTFTarget', 'Gt2Solov2Target'
+]
+
+
+@register_op
+class PadBatch(BaseOperator):
+    """
+    Pad a batch of samples so they can be divisible by a stride.
+    The layout of each image should be 'CHW'.
+    Args:
+        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
+            height and width is divisible by `pad_to_stride`.
+    """
+
+    def __init__(self, pad_to_stride=0):
+        super(PadBatch, self).__init__()
+        self.pad_to_stride = pad_to_stride
+
+    def __call__(self, samples, context=None):
+        """
+        Args:
+            samples (list): a batch of sample, each is dict.
+        """
+        coarsest_stride = self.pad_to_stride
+
+        max_shape = np.array([data['image'].shape for data in samples]).max(
+            axis=0)
+        if coarsest_stride > 0:
+            max_shape[1] = int(
+                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
+            max_shape[2] = int(
+                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
+
+        for data in samples:
+            im = data['image']
+            im_c, im_h, im_w = im.shape[:]
+            padding_im = np.zeros(
+                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
+            padding_im[:, :im_h, :im_w] = im
+            data['image'] = padding_im
+            if 'semantic' in data and data['semantic'] is not None:
+                semantic = data['semantic']
+                padding_sem = np.zeros(
+                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
+                padding_sem[:, :im_h, :im_w] = semantic
+                data['semantic'] = padding_sem
+            if 'gt_segm' in data and data['gt_segm'] is not None:
+                gt_segm = data['gt_segm']
+                padding_segm = np.zeros(
+                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
+                    dtype=np.uint8)
+                padding_segm[:, :im_h, :im_w] = gt_segm
+                data['gt_segm'] = padding_segm
+
+            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
+                # ploy to rbox
+                polys = data['gt_rbox2poly']
+                rbox = bbox_utils.poly2rbox(polys)
+                data['gt_rbox'] = rbox
+
+        return samples
+
+
+@register_op
+class BatchRandomResize(BaseOperator):
+    """
+    Resize image to target size randomly. random target_size and interpolation method
+    Args:
+        target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
+        keep_ratio (bool): whether keep_raio or not, default true
+        interp (int): the interpolation method
+        random_size (bool): whether random select target size of image
+        random_interp (bool): whether random select interpolation method
+    """
+
+    def __init__(self,
+                 target_size,
+                 keep_ratio,
+                 interp=cv2.INTER_NEAREST,
+                 random_size=True,
+                 random_interp=False):
+        super(BatchRandomResize, self).__init__()
+        self.keep_ratio = keep_ratio
+        self.interps = [
+            cv2.INTER_NEAREST,
+            cv2.INTER_LINEAR,
+            cv2.INTER_AREA,
+            cv2.INTER_CUBIC,
+            cv2.INTER_LANCZOS4,
+        ]
+        self.interp = interp
+        assert isinstance(target_size, (
+            int, Sequence)), "target_size must be int, list or tuple"
+        if random_size and not isinstance(target_size, list):
+            raise TypeError(
+                "Type of target_size is invalid when random_size is True. Must be List, now is {}".
+                format(type(target_size)))
+        self.target_size = target_size
+        self.random_size = random_size
+        self.random_interp = random_interp
+
+    def __call__(self, samples, context=None):
+        if self.random_size:
+            index = np.random.choice(len(self.target_size))
+            target_size = self.target_size[index]
+        else:
+            target_size = self.target_size
+
+        if self.random_interp:
+            interp = np.random.choice(self.interps)
+        else:
+            interp = self.interp
+
+        resizer = Resize(target_size, keep_ratio=self.keep_ratio, interp=interp)
+        return resizer(samples, context=context)
+
+
+@register_op
+class Gt2YoloTarget(BaseOperator):
+    """
+    Generate YOLOv3 targets by groud truth data, this operator is only used in
+    fine grained YOLOv3 loss mode
+    """
+
+    def __init__(self,
+                 anchors,
+                 anchor_masks,
+                 downsample_ratios,
+                 num_classes=80,
+                 iou_thresh=1.):
+        super(Gt2YoloTarget, self).__init__()
+        self.anchors = anchors
+        self.anchor_masks = anchor_masks
+        self.downsample_ratios = downsample_ratios
+        self.num_classes = num_classes
+        self.iou_thresh = iou_thresh
+
+    def __call__(self, samples, context=None):
+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
+            "anchor_masks', and 'downsample_ratios' should have same length."
+
+        h, w = samples[0]['image'].shape[1:3]
+        an_hw = np.array(self.anchors) / np.array([[w, h]])
+        for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
+            im = sample['image']
+            gt_bbox = sample['gt_bbox']
+            gt_class = sample['gt_class']
+            if 'gt_score' not in sample:
+                sample['gt_score'] = np.ones(
+                    (gt_bbox.shape[0], 1), dtype=np.float32)
+            gt_score = sample['gt_score']
+            for i, (
+                    mask, downsample_ratio
+            ) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
+                grid_h = int(h / downsample_ratio)
+                grid_w = int(w / downsample_ratio)
+                target = np.zeros(
+                    (len(mask), 6 + self.num_classes, grid_h, grid_w),
+                    dtype=np.float32)
+                for b in range(gt_bbox.shape[0]):
+                    gx, gy, gw, gh = gt_bbox[b, :]
+                    cls = gt_class[b]
+                    score = gt_score[b]
+                    if gw <= 0. or gh <= 0. or score <= 0.:
+                        continue
+
+                    # find best match anchor index
+                    best_iou = 0.
+                    best_idx = -1
+                    for an_idx in range(an_hw.shape[0]):
+                        iou = jaccard_overlap(
+                            [0., 0., gw, gh],
+                            [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
+                        if iou > best_iou:
+                            best_iou = iou
+                            best_idx = an_idx
+
+                    gi = int(gx * grid_w)
+                    gj = int(gy * grid_h)
+
+                    # gtbox should be regresed in this layes if best match 
+                    # anchor index in anchor mask of this layer
+                    if best_idx in mask:
+                        best_n = mask.index(best_idx)
+
+                        # x, y, w, h, scale
+                        target[best_n, 0, gj, gi] = gx * grid_w - gi
+                        target[best_n, 1, gj, gi] = gy * grid_h - gj
+                        target[best_n, 2, gj, gi] = np.log(
+                            gw * w / self.anchors[best_idx][0])
+                        target[best_n, 3, gj, gi] = np.log(
+                            gh * h / self.anchors[best_idx][1])
+                        target[best_n, 4, gj, gi] = 2.0 - gw * gh
+
+                        # objectness record gt_score
+                        target[best_n, 5, gj, gi] = score
+
+                        # classification
+                        target[best_n, 6 + cls, gj, gi] = 1.
+
+                    # For non-matched anchors, calculate the target if the iou 
+                    # between anchor and gt is larger than iou_thresh
+                    if self.iou_thresh < 1:
+                        for idx, mask_i in enumerate(mask):
+                            if mask_i == best_idx: continue
+                            iou = jaccard_overlap(
+                                [0., 0., gw, gh],
+                                [0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
+                            if iou > self.iou_thresh and target[idx, 5, gj,
+                                                                gi] == 0.:
+                                # x, y, w, h, scale
+                                target[idx, 0, gj, gi] = gx * grid_w - gi
+                                target[idx, 1, gj, gi] = gy * grid_h - gj
+                                target[idx, 2, gj, gi] = np.log(
+                                    gw * w / self.anchors[mask_i][0])
+                                target[idx, 3, gj, gi] = np.log(
+                                    gh * h / self.anchors[mask_i][1])
+                                target[idx, 4, gj, gi] = 2.0 - gw * gh
+
+                                # objectness record gt_score
+                                target[idx, 5, gj, gi] = score
+
+                                # classification
+                                target[idx, 6 + cls, gj, gi] = 1.
+                sample['target{}'.format(i)] = target
+
+            # remove useless gt_class and gt_score after target calculated
+            sample.pop('gt_class')
+            sample.pop('gt_score')
+
+        return samples
+
+
+@register_op
+class Gt2FCOSTarget(BaseOperator):
+    """
+    Generate FCOS targets by groud truth data
+    """
+
+    def __init__(self,
+                 object_sizes_boundary,
+                 center_sampling_radius,
+                 downsample_ratios,
+                 norm_reg_targets=False):
+        super(Gt2FCOSTarget, self).__init__()
+        self.center_sampling_radius = center_sampling_radius
+        self.downsample_ratios = downsample_ratios
+        self.INF = np.inf
+        self.object_sizes_boundary = [-1] + object_sizes_boundary + [self.INF]
+        object_sizes_of_interest = []
+        for i in range(len(self.object_sizes_boundary) - 1):
+            object_sizes_of_interest.append([
+                self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1]
+            ])
+        self.object_sizes_of_interest = object_sizes_of_interest
+        self.norm_reg_targets = norm_reg_targets
+
+    def _compute_points(self, w, h):
+        """
+        compute the corresponding points in each feature map
+        :param h: image height
+        :param w: image width
+        :return: points from all feature map
+        """
+        locations = []
+        for stride in self.downsample_ratios:
+            shift_x = np.arange(0, w, stride).astype(np.float32)
+            shift_y = np.arange(0, h, stride).astype(np.float32)
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+            shift_x = shift_x.flatten()
+            shift_y = shift_y.flatten()
+            location = np.stack([shift_x, shift_y], axis=1) + stride // 2
+            locations.append(location)
+        num_points_each_level = [len(location) for location in locations]
+        locations = np.concatenate(locations, axis=0)
+        return locations, num_points_each_level
+
+    def _convert_xywh2xyxy(self, gt_bbox, w, h):
+        """
+        convert the bounding box from style xywh to xyxy
+        :param gt_bbox: bounding boxes normalized into [0, 1]
+        :param w: image width
+        :param h: image height
+        :return: bounding boxes in xyxy style
+        """
+        bboxes = gt_bbox.copy()
+        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * w
+        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * h
+        bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
+        bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
+        return bboxes
+
+    def _check_inside_boxes_limited(self, gt_bbox, xs, ys,
+                                    num_points_each_level):
+        """
+        check if points is within the clipped boxes
+        :param gt_bbox: bounding boxes
+        :param xs: horizontal coordinate of points
+        :param ys: vertical coordinate of points
+        :return: the mask of points is within gt_box or not
+        """
+        bboxes = np.reshape(
+            gt_bbox, newshape=[1, gt_bbox.shape[0], gt_bbox.shape[1]])
+        bboxes = np.tile(bboxes, reps=[xs.shape[0], 1, 1])
+        ct_x = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2
+        ct_y = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2
+        beg = 0
+        clipped_box = bboxes.copy()
+        for lvl, stride in enumerate(self.downsample_ratios):
+            end = beg + num_points_each_level[lvl]
+            stride_exp = self.center_sampling_radius * stride
+            clipped_box[beg:end, :, 0] = np.maximum(
+                bboxes[beg:end, :, 0], ct_x[beg:end, :] - stride_exp)
+            clipped_box[beg:end, :, 1] = np.maximum(
+                bboxes[beg:end, :, 1], ct_y[beg:end, :] - stride_exp)
+            clipped_box[beg:end, :, 2] = np.minimum(
+                bboxes[beg:end, :, 2], ct_x[beg:end, :] + stride_exp)
+            clipped_box[beg:end, :, 3] = np.minimum(
+                bboxes[beg:end, :, 3], ct_y[beg:end, :] + stride_exp)
+            beg = end
+        l_res = xs - clipped_box[:, :, 0]
+        r_res = clipped_box[:, :, 2] - xs
+        t_res = ys - clipped_box[:, :, 1]
+        b_res = clipped_box[:, :, 3] - ys
+        clipped_box_reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
+        inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
+        return inside_gt_box
+
+    def __call__(self, samples, context=None):
+        assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
+            "object_sizes_of_interest', and 'downsample_ratios' should have same length."
+
+        for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
+            im = sample['image']
+            bboxes = sample['gt_bbox']
+            gt_class = sample['gt_class']
+            # calculate the locations
+            h, w = im.shape[1:3]
+            points, num_points_each_level = self._compute_points(w, h)
+            object_scale_exp = []
+            for i, num_pts in enumerate(num_points_each_level):
+                object_scale_exp.append(
+                    np.tile(
+                        np.array([self.object_sizes_of_interest[i]]),
+                        reps=[num_pts, 1]))
+            object_scale_exp = np.concatenate(object_scale_exp, axis=0)
+
+            gt_area = (bboxes[:, 2] - bboxes[:, 0]) * (
+                bboxes[:, 3] - bboxes[:, 1])
+            xs, ys = points[:, 0], points[:, 1]
+            xs = np.reshape(xs, newshape=[xs.shape[0], 1])
+            xs = np.tile(xs, reps=[1, bboxes.shape[0]])
+            ys = np.reshape(ys, newshape=[ys.shape[0], 1])
+            ys = np.tile(ys, reps=[1, bboxes.shape[0]])
+
+            l_res = xs - bboxes[:, 0]
+            r_res = bboxes[:, 2] - xs
+            t_res = ys - bboxes[:, 1]
+            b_res = bboxes[:, 3] - ys
+            reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
+            if self.center_sampling_radius > 0:
+                is_inside_box = self._check_inside_boxes_limited(
+                    bboxes, xs, ys, num_points_each_level)
+            else:
+                is_inside_box = np.min(reg_targets, axis=2) > 0
+            # check if the targets is inside the corresponding level
+            max_reg_targets = np.max(reg_targets, axis=2)
+            lower_bound = np.tile(
+                np.expand_dims(
+                    object_scale_exp[:, 0], axis=1),
+                reps=[1, max_reg_targets.shape[1]])
+            high_bound = np.tile(
+                np.expand_dims(
+                    object_scale_exp[:, 1], axis=1),
+                reps=[1, max_reg_targets.shape[1]])
+            is_match_current_level = \
+                (max_reg_targets > lower_bound) & \
+                (max_reg_targets < high_bound)
+            points2gtarea = np.tile(
+                np.expand_dims(
+                    gt_area, axis=0), reps=[xs.shape[0], 1])
+            points2gtarea[is_inside_box == 0] = self.INF
+            points2gtarea[is_match_current_level == 0] = self.INF
+            points2min_area = points2gtarea.min(axis=1)
+            points2min_area_ind = points2gtarea.argmin(axis=1)
+            labels = gt_class[points2min_area_ind] + 1
+            labels[points2min_area == self.INF] = 0
+            reg_targets = reg_targets[range(xs.shape[0]), points2min_area_ind]
+            ctn_targets = np.sqrt((reg_targets[:, [0, 2]].min(axis=1) / \
+                                  reg_targets[:, [0, 2]].max(axis=1)) * \
+                                  (reg_targets[:, [1, 3]].min(axis=1) / \
+                                   reg_targets[:, [1, 3]].max(axis=1))).astype(np.float32)
+            ctn_targets = np.reshape(
+                ctn_targets, newshape=[ctn_targets.shape[0], 1])
+            ctn_targets[labels <= 0] = 0
+            pos_ind = np.nonzero(labels != 0)
+            reg_targets_pos = reg_targets[pos_ind[0], :]
+            split_sections = []
+            beg = 0
+            for lvl in range(len(num_points_each_level)):
+                end = beg + num_points_each_level[lvl]
+                split_sections.append(end)
+                beg = end
+            labels_by_level = np.split(labels, split_sections, axis=0)
+            reg_targets_by_level = np.split(reg_targets, split_sections, axis=0)
+            ctn_targets_by_level = np.split(ctn_targets, split_sections, axis=0)
+            for lvl in range(len(self.downsample_ratios)):
+                grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
+                grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
+                if self.norm_reg_targets:
+                    sample['reg_target{}'.format(lvl)] = \
+                        np.reshape(
+                            reg_targets_by_level[lvl] / \
+                            self.downsample_ratios[lvl],
+                            newshape=[grid_h, grid_w, 4])
+                else:
+                    sample['reg_target{}'.format(lvl)] = np.reshape(
+                        reg_targets_by_level[lvl],
+                        newshape=[grid_h, grid_w, 4])
+                sample['labels{}'.format(lvl)] = np.reshape(
+                    labels_by_level[lvl], newshape=[grid_h, grid_w, 1])
+                sample['centerness{}'.format(lvl)] = np.reshape(
+                    ctn_targets_by_level[lvl], newshape=[grid_h, grid_w, 1])
+
+            sample.pop('is_crowd', None)
+            sample.pop('difficult', None)
+            sample.pop('gt_class', None)
+            sample.pop('gt_bbox', None)
+        return samples
+
+
+@register_op
+class Gt2TTFTarget(BaseOperator):
+    __shared__ = ['num_classes']
+    """
+    Gt2TTFTarget
+    Generate TTFNet targets by ground truth data
+    
+    Args:
+        num_classes(int): the number of classes.
+        down_ratio(int): the down ratio from images to heatmap, 4 by default.
+        alpha(float): the alpha parameter to generate gaussian target.
+            0.54 by default.
+    """
+
+    def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
+        super(Gt2TTFTarget, self).__init__()
+        self.down_ratio = down_ratio
+        self.num_classes = num_classes
+        self.alpha = alpha
+
+    def __call__(self, samples, context=None):
+        output_size = samples[0]['image'].shape[1]
+        feat_size = output_size // self.down_ratio
+        for sample in samples:
+            heatmap = np.zeros(
+                (self.num_classes, feat_size, feat_size), dtype='float32')
+            box_target = np.ones(
+                (4, feat_size, feat_size), dtype='float32') * -1
+            reg_weight = np.zeros((1, feat_size, feat_size), dtype='float32')
+
+            gt_bbox = sample['gt_bbox']
+            gt_class = sample['gt_class']
+
+            bbox_w = gt_bbox[:, 2] - gt_bbox[:, 0] + 1
+            bbox_h = gt_bbox[:, 3] - gt_bbox[:, 1] + 1
+            area = bbox_w * bbox_h
+            boxes_areas_log = np.log(area)
+            boxes_ind = np.argsort(boxes_areas_log, axis=0)[::-1]
+            boxes_area_topk_log = boxes_areas_log[boxes_ind]
+            gt_bbox = gt_bbox[boxes_ind]
+            gt_class = gt_class[boxes_ind]
+
+            feat_gt_bbox = gt_bbox / self.down_ratio
+            feat_gt_bbox = np.clip(feat_gt_bbox, 0, feat_size - 1)
+            feat_hs, feat_ws = (feat_gt_bbox[:, 3] - feat_gt_bbox[:, 1],
+                                feat_gt_bbox[:, 2] - feat_gt_bbox[:, 0])
+
+            ct_inds = np.stack(
+                [(gt_bbox[:, 0] + gt_bbox[:, 2]) / 2,
+                 (gt_bbox[:, 1] + gt_bbox[:, 3]) / 2],
+                axis=1) / self.down_ratio
+
+            h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype('int32')
+            w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype('int32')
+
+            for k in range(len(gt_bbox)):
+                cls_id = gt_class[k]
+                fake_heatmap = np.zeros((feat_size, feat_size), dtype='float32')
+                self.draw_truncate_gaussian(fake_heatmap, ct_inds[k],
+                                            h_radiuses_alpha[k],
+                                            w_radiuses_alpha[k])
+
+                heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
+                box_target_inds = fake_heatmap > 0
+                box_target[:, box_target_inds] = gt_bbox[k][:, None]
+
+                local_heatmap = fake_heatmap[box_target_inds]
+                ct_div = np.sum(local_heatmap)
+                local_heatmap *= boxes_area_topk_log[k]
+                reg_weight[0, box_target_inds] = local_heatmap / ct_div
+            sample['ttf_heatmap'] = heatmap
+            sample['ttf_box_target'] = box_target
+            sample['ttf_reg_weight'] = reg_weight
+            sample.pop('is_crowd', None)
+            sample.pop('difficult', None)
+            sample.pop('gt_class', None)
+            sample.pop('gt_bbox', None)
+            sample.pop('gt_score', None)
+        return samples
+
+    def draw_truncate_gaussian(self, heatmap, center, h_radius, w_radius):
+        h, w = 2 * h_radius + 1, 2 * w_radius + 1
+        sigma_x = w / 6
+        sigma_y = h / 6
+        gaussian = gaussian2D((h, w), sigma_x, sigma_y)
+
+        x, y = int(center[0]), int(center[1])
+
+        height, width = heatmap.shape[0:2]
+
+        left, right = min(x, w_radius), min(width - x, w_radius + 1)
+        top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
+
+        masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+        masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius -
+                                   left:w_radius + right]
+        if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+            heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
+                masked_heatmap, masked_gaussian)
+        return heatmap
+
+
+@register_op
+class Gt2Solov2Target(BaseOperator):
+    """Assign mask target and labels in SOLOv2 network.
+    Args:
+        num_grids (list): The list of feature map grids size.
+        scale_ranges (list): The list of mask boundary range.
+        coord_sigma (float): The coefficient of coordinate area length.
+        sampling_ratio (float): The ratio of down sampling.
+    """
+
+    def __init__(self,
+                 num_grids=[40, 36, 24, 16, 12],
+                 scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
+                               [384, 2048]],
+                 coord_sigma=0.2,
+                 sampling_ratio=4.0):
+        super(Gt2Solov2Target, self).__init__()
+        self.num_grids = num_grids
+        self.scale_ranges = scale_ranges
+        self.coord_sigma = coord_sigma
+        self.sampling_ratio = sampling_ratio
+
+    def _scale_size(self, im, scale):
+        h, w = im.shape[:2]
+        new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
+        resized_img = cv2.resize(
+            im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
+        return resized_img
+
+    def __call__(self, samples, context=None):
+        sample_id = 0
+        max_ins_num = [0] * len(self.num_grids)
+        for sample in samples:
+            gt_bboxes_raw = sample['gt_bbox']
+            gt_labels_raw = sample['gt_class'] + 1
+            im_c, im_h, im_w = sample['image'].shape[:]
+            gt_masks_raw = sample['gt_segm'].astype(np.uint8)
+            mask_feat_size = [
+                int(im_h / self.sampling_ratio), int(im_w / self.sampling_ratio)
+            ]
+            gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
+                               (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+            ins_ind_label_list = []
+            idx = 0
+            for (lower_bound, upper_bound), num_grid \
+                    in zip(self.scale_ranges, self.num_grids):
+
+                hit_indices = ((gt_areas >= lower_bound) &
+                               (gt_areas <= upper_bound)).nonzero()[0]
+                num_ins = len(hit_indices)
+
+                ins_label = []
+                grid_order = []
+                cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
+                ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
+
+                if num_ins == 0:
+                    ins_label = np.zeros(
+                        [1, mask_feat_size[0], mask_feat_size[1]],
+                        dtype=np.uint8)
+                    ins_ind_label_list.append(ins_ind_label)
+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
+                    sample['ins_label{}'.format(idx)] = ins_label
+                    sample['grid_order{}'.format(idx)] = np.asarray(
+                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
+                    idx += 1
+                    continue
+                gt_bboxes = gt_bboxes_raw[hit_indices]
+                gt_labels = gt_labels_raw[hit_indices]
+                gt_masks = gt_masks_raw[hit_indices, ...]
+
+                half_ws = 0.5 * (
+                    gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
+                half_hs = 0.5 * (
+                    gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
+
+                for seg_mask, gt_label, half_h, half_w in zip(
+                        gt_masks, gt_labels, half_hs, half_ws):
+                    if seg_mask.sum() == 0:
+                        continue
+                    # mass center
+                    upsampled_size = (mask_feat_size[0] * 4,
+                                      mask_feat_size[1] * 4)
+                    center_h, center_w = ndimage.measurements.center_of_mass(
+                        seg_mask)
+                    coord_w = int(
+                        (center_w / upsampled_size[1]) // (1. / num_grid))
+                    coord_h = int(
+                        (center_h / upsampled_size[0]) // (1. / num_grid))
+
+                    # left, top, right, down
+                    top_box = max(0,
+                                  int(((center_h - half_h) / upsampled_size[0])
+                                      // (1. / num_grid)))
+                    down_box = min(num_grid - 1,
+                                   int(((center_h + half_h) / upsampled_size[0])
+                                       // (1. / num_grid)))
+                    left_box = max(0,
+                                   int(((center_w - half_w) / upsampled_size[1])
+                                       // (1. / num_grid)))
+                    right_box = min(num_grid - 1,
+                                    int(((center_w + half_w) /
+                                         upsampled_size[1]) // (1. / num_grid)))
+
+                    top = max(top_box, coord_h - 1)
+                    down = min(down_box, coord_h + 1)
+                    left = max(coord_w - 1, left_box)
+                    right = min(right_box, coord_w + 1)
+
+                    cate_label[top:(down + 1), left:(right + 1)] = gt_label
+                    seg_mask = self._scale_size(
+                        seg_mask, scale=1. / self.sampling_ratio)
+                    for i in range(top, down + 1):
+                        for j in range(left, right + 1):
+                            label = int(i * num_grid + j)
+                            cur_ins_label = np.zeros(
+                                [mask_feat_size[0], mask_feat_size[1]],
+                                dtype=np.uint8)
+                            cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
+                                1]] = seg_mask
+                            ins_label.append(cur_ins_label)
+                            ins_ind_label[label] = True
+                            grid_order.append(sample_id * num_grid * num_grid +
+                                              label)
+                if ins_label == []:
+                    ins_label = np.zeros(
+                        [1, mask_feat_size[0], mask_feat_size[1]],
+                        dtype=np.uint8)
+                    ins_ind_label_list.append(ins_ind_label)
+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
+                    sample['ins_label{}'.format(idx)] = ins_label
+                    sample['grid_order{}'.format(idx)] = np.asarray(
+                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
+                else:
+                    ins_label = np.stack(ins_label, axis=0)
+                    ins_ind_label_list.append(ins_ind_label)
+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
+                    sample['ins_label{}'.format(idx)] = ins_label
+                    sample['grid_order{}'.format(idx)] = np.asarray(
+                        grid_order, dtype=np.int32)
+                    assert len(grid_order) > 0
+                max_ins_num[idx] = max(
+                    max_ins_num[idx],
+                    sample['ins_label{}'.format(idx)].shape[0])
+                idx += 1
+            ins_ind_labels = np.concatenate([
+                ins_ind_labels_level_img
+                for ins_ind_labels_level_img in ins_ind_label_list
+            ])
+            fg_num = np.sum(ins_ind_labels)
+            sample['fg_num'] = fg_num
+            sample_id += 1
+
+            sample.pop('is_crowd')
+            sample.pop('gt_class')
+            sample.pop('gt_bbox')
+            sample.pop('gt_poly')
+            sample.pop('gt_segm')
+
+        # padding batch
+        for data in samples:
+            for idx in range(len(self.num_grids)):
+                gt_ins_data = np.zeros(
+                    [
+                        max_ins_num[idx],
+                        data['ins_label{}'.format(idx)].shape[1],
+                        data['ins_label{}'.format(idx)].shape[2]
+                    ],
+                    dtype=np.uint8)
+                gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
+                    0], :, :] = data['ins_label{}'.format(idx)]
+                gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
+                gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
+                    0]] = data['grid_order{}'.format(idx)]
+                data['ins_label{}'.format(idx)] = gt_ins_data
+                data['grid_order{}'.format(idx)] = gt_grid_order
+
+        return samples
--- a/build/lib/ppdet/data/transform/gridmask_utils.py
+++ b/build/lib/ppdet/data/transform/gridmask_utils.py
@ -0,0 +1,83 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import numpy as np
+from PIL import Image
+
+
+class Gridmask(object):
+    def __init__(self,
+                 use_h=True,
+                 use_w=True,
+                 rotate=1,
+                 offset=False,
+                 ratio=0.5,
+                 mode=1,
+                 prob=0.7,
+                 upper_iter=360000):
+        super(Gridmask, self).__init__()
+        self.use_h = use_h
+        self.use_w = use_w
+        self.rotate = rotate
+        self.offset = offset
+        self.ratio = ratio
+        self.mode = mode
+        self.prob = prob
+        self.st_prob = prob
+        self.upper_iter = upper_iter
+
+    def __call__(self, x, curr_iter):
+        self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
+        if np.random.rand() > self.prob:
+            return x
+        h, w, _ = x.shape
+        hh = int(1.5 * h)
+        ww = int(1.5 * w)
+        d = np.random.randint(2, h)
+        self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
+        mask = np.ones((hh, ww), np.float32)
+        st_h = np.random.randint(d)
+        st_w = np.random.randint(d)
+        if self.use_h:
+            for i in range(hh // d):
+                s = d * i + st_h
+                t = min(s + self.l, hh)
+                mask[s:t, :] *= 0
+        if self.use_w:
+            for i in range(ww // d):
+                s = d * i + st_w
+                t = min(s + self.l, ww)
+                mask[:, s:t] *= 0
+
+        r = np.random.randint(self.rotate)
+        mask = Image.fromarray(np.uint8(mask))
+        mask = mask.rotate(r)
+        mask = np.asarray(mask)
+        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
+                    + w].astype(np.float32)
+
+        if self.mode == 1:
+            mask = 1 - mask
+        mask = np.expand_dims(mask, axis=-1)
+        if self.offset:
+            offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
+            x = (x * mask + offset * (1 - mask)).astype(x.dtype)
+        else:
+            x = (x * mask).astype(x.dtype)
+
+        return x
--- a/build/lib/ppdet/data/transform/keypoint_operators.py
+++ b/build/lib/ppdet/data/transform/keypoint_operators.py
@ -0,0 +1,663 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# function:
+#    operators to process sample,
+#    eg: decode/resize/crop image
+
+from __future__ import absolute_import
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+
+import cv2
+import numpy as np
+import math
+import copy
+
+from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform
+from ppdet.core.workspace import serializable
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+registered_ops = []
+
+__all__ = [
+    'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
+    'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
+    'TopDownAffine', 'ToHeatmapsTopDown', 'TopDownEvalAffine'
+]
+
+
+def register_keypointop(cls):
+    return serializable(cls)
+
+
+@register_keypointop
+class KeyPointFlip(object):
+    """Get the fliped image by flip_prob. flip the coords also
+    the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
+
+    Args:
+        flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
+        flip_prob (float): the ratio whether to flip the image
+        records(dict): the dict contained the image, mask and coords
+
+    Returns:
+        records(dict): contain the image, mask and coords after tranformed
+
+    """
+
+    def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
+        super(KeyPointFlip, self).__init__()
+        assert isinstance(flip_permutation, Sequence)
+        self.flip_permutation = flip_permutation
+        self.flip_prob = flip_prob
+        self.hmsize = hmsize
+
+    def __call__(self, records):
+        image = records['image']
+        kpts_lst = records['joints']
+        mask_lst = records['mask']
+        flip = np.random.random() < self.flip_prob
+        if flip:
+            image = image[:, ::-1]
+            for idx, hmsize in enumerate(self.hmsize):
+                if len(mask_lst) > idx:
+                    mask_lst[idx] = mask_lst[idx][:, ::-1]
+                if kpts_lst[idx].ndim == 3:
+                    kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
+                else:
+                    kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
+                kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
+                kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
+                kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
+                kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
+                kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
+                kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
+        records['image'] = image
+        records['joints'] = kpts_lst
+        records['mask'] = mask_lst
+        return records
+
+
+def get_warp_matrix(theta, size_input, size_dst, size_target):
+    """Calculate the transformation matrix under the constraint of unbiased.
+    Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
+    Data Processing for Human Pose Estimation (CVPR 2020).
+
+    Args:
+        theta (float): Rotation angle in degrees.
+        size_input (np.ndarray): Size of input image [w, h].
+        size_dst (np.ndarray): Size of output image [w, h].
+        size_target (np.ndarray): Size of ROI in input plane [w, h].
+
+    Returns:
+        matrix (np.ndarray): A matrix for transformation.
+    """
+    theta = np.deg2rad(theta)
+    matrix = np.zeros((2, 3), dtype=np.float32)
+    scale_x = size_dst[0] / size_target[0]
+    scale_y = size_dst[1] / size_target[1]
+    matrix[0, 0] = math.cos(theta) * scale_x
+    matrix[0, 1] = -math.sin(theta) * scale_x
+    matrix[0, 2] = scale_x * (
+        -0.5 * size_input[0] * math.cos(theta) + 0.5 * size_input[1] *
+        math.sin(theta) + 0.5 * size_target[0])
+    matrix[1, 0] = math.sin(theta) * scale_y
+    matrix[1, 1] = math.cos(theta) * scale_y
+    matrix[1, 2] = scale_y * (
+        -0.5 * size_input[0] * math.sin(theta) - 0.5 * size_input[1] *
+        math.cos(theta) + 0.5 * size_target[1])
+    return matrix
+
+
+@register_keypointop
+class RandomAffine(object):
+    """apply affine transform to image, mask and coords
+    to achieve the rotate, scale and shift effect for training image
+
+    Args:
+        max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
+        max_scale (list[2]): the scale range to apply, transform range is [min, max]
+        max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
+        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
+        scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
+        records(dict): the dict contained the image, mask and coords
+
+    Returns:
+        records(dict): contain the image, mask and coords after tranformed
+
+    """
+
+    def __init__(self,
+                 max_degree=30,
+                 scale=[0.75, 1.5],
+                 max_shift=0.2,
+                 hmsize=[128, 256],
+                 trainsize=512,
+                 scale_type='short'):
+        super(RandomAffine, self).__init__()
+        self.max_degree = max_degree
+        self.min_scale = scale[0]
+        self.max_scale = scale[1]
+        self.max_shift = max_shift
+        self.hmsize = hmsize
+        self.trainsize = trainsize
+        self.scale_type = scale_type
+
+    def _get_affine_matrix(self, center, scale, res, rot=0):
+        """Generate transformation matrix."""
+        h = scale
+        t = np.zeros((3, 3), dtype=np.float32)
+        t[0, 0] = float(res[1]) / h
+        t[1, 1] = float(res[0]) / h
+        t[0, 2] = res[1] * (-float(center[0]) / h + .5)
+        t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+        t[2, 2] = 1
+        if rot != 0:
+            rot = -rot  # To match direction of rotation from cropping
+            rot_mat = np.zeros((3, 3), dtype=np.float32)
+            rot_rad = rot * np.pi / 180
+            sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+            rot_mat[0, :2] = [cs, -sn]
+            rot_mat[1, :2] = [sn, cs]
+            rot_mat[2, 2] = 1
+            # Need to rotate around center
+            t_mat = np.eye(3)
+            t_mat[0, 2] = -res[1] / 2
+            t_mat[1, 2] = -res[0] / 2
+            t_inv = t_mat.copy()
+            t_inv[:2, 2] *= -1
+            t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
+        return t
+
+    def __call__(self, records):
+        image = records['image']
+        keypoints = records['joints']
+        heatmap_mask = records['mask']
+
+        degree = (np.random.random() * 2 - 1) * self.max_degree
+        shape = np.array(image.shape[:2][::-1])
+        center = center = np.array((np.array(shape) / 2))
+
+        aug_scale = np.random.random() * (self.max_scale - self.min_scale
+                                          ) + self.min_scale
+        if self.scale_type == 'long':
+            scale = max(shape[0], shape[1]) / 1.0
+        elif self.scale_type == 'short':
+            scale = min(shape[0], shape[1]) / 1.0
+        else:
+            raise ValueError('Unknown scale type: {}'.format(self.scale_type))
+        roi_size = aug_scale * scale
+        dx = int(0)
+        dy = int(0)
+        if self.max_shift > 0:
+
+            dx = np.random.randint(-self.max_shift * roi_size,
+                                   self.max_shift * roi_size)
+            dy = np.random.randint(-self.max_shift * roi_size,
+                                   self.max_shift * roi_size)
+
+        center += np.array([dx, dy])
+        input_size = 2 * center
+
+        keypoints[..., :2] *= shape
+        heatmap_mask *= 255
+        kpts_lst = []
+        mask_lst = []
+
+        image_affine_mat = self._get_affine_matrix(
+            center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
+        image = cv2.warpAffine(
+            image,
+            image_affine_mat, (self.trainsize, self.trainsize),
+            flags=cv2.INTER_LINEAR)
+        for hmsize in self.hmsize:
+            kpts = copy.deepcopy(keypoints)
+            mask_affine_mat = self._get_affine_matrix(
+                center, roi_size, (hmsize, hmsize), degree)[:2]
+            if heatmap_mask is not None:
+                mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
+                                      (hmsize, hmsize))
+                mask = ((mask / 255) > 0.5).astype(np.float32)
+            kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
+                                                mask_affine_mat)
+            kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
+            kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
+            kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
+            kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
+            kpts_lst.append(kpts)
+            mask_lst.append(mask)
+        records['image'] = image
+        records['joints'] = kpts_lst
+        records['mask'] = mask_lst
+        return records
+
+
+@register_keypointop
+class EvalAffine(object):
+    """apply affine transform to image
+    resize the short of [h,w] to standard size for eval
+
+    Args:
+        size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
+        records(dict): the dict contained the image, mask and coords
+
+    Returns:
+        records(dict): contain the image, mask and coords after tranformed
+
+    """
+
+    def __init__(self, size, stride=64):
+        super(EvalAffine, self).__init__()
+        self.size = size
+        self.stride = stride
+
+    def __call__(self, records):
+        image = records['image']
+        mask = records['mask'] if 'mask' in records else None
+        s = self.size
+        h, w, _ = image.shape
+        trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
+        image_resized = cv2.warpAffine(image, trans, size_resized)
+        if mask is not None:
+            mask = cv2.warpAffine(mask, trans, size_resized)
+            records['mask'] = mask
+        if 'joints' in records:
+            del records['joints']
+        records['image'] = image_resized
+        return records
+
+
+@register_keypointop
+class NormalizePermute(object):
+    def __init__(self,
+                 mean=[123.675, 116.28, 103.53],
+                 std=[58.395, 57.120, 57.375],
+                 is_scale=True):
+        super(NormalizePermute, self).__init__()
+        self.mean = mean
+        self.std = std
+        self.is_scale = is_scale
+
+    def __call__(self, records):
+        image = records['image']
+        image = image.astype(np.float32)
+        if self.is_scale:
+            image /= 255.
+        image = image.transpose((2, 0, 1))
+        mean = np.array(self.mean, dtype=np.float32)
+        std = np.array(self.std, dtype=np.float32)
+        invstd = 1. / std
+        for v, m, s in zip(image, mean, invstd):
+            v.__isub__(m).__imul__(s)
+        records['image'] = image
+        return records
+
+
+@register_keypointop
+class TagGenerate(object):
+    """record gt coords for aeloss to sample coords value in tagmaps
+
+    Args:
+        num_joints (int): the keypoint numbers of dataset to train
+        num_people (int): maxmum people to support for sample aeloss
+        records(dict): the dict contained the image, mask and coords
+
+    Returns:
+        records(dict): contain the gt coords used in tagmap
+
+    """
+
+    def __init__(self, num_joints, max_people=30):
+        super(TagGenerate, self).__init__()
+        self.max_people = max_people
+        self.num_joints = num_joints
+
+    def __call__(self, records):
+        kpts_lst = records['joints']
+        kpts = kpts_lst[0]
+        tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
+        inds = np.where(kpts[..., 2] > 0)
+        p, j = inds[0], inds[1]
+        visible = kpts[inds]
+        # tagmap is [p, j, 3], where last dim is j, y, x
+        tagmap[p, j, 0] = j
+        tagmap[p, j, 1] = visible[..., 1]  # y
+        tagmap[p, j, 2] = visible[..., 0]  # x
+        tagmap[p, j, 3] = 1
+        records['tagmap'] = tagmap
+        del records['joints']
+        return records
+
+
+@register_keypointop
+class ToHeatmaps(object):
+    """to generate the gaussin heatmaps of keypoint for heatmap loss
+
+    Args:
+        num_joints (int): the keypoint numbers of dataset to train
+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
+        sigma (float): the std of gaussin kernel genereted
+        records(dict): the dict contained the image, mask and coords
+
+    Returns:
+        records(dict): contain the heatmaps used to heatmaploss
+
+    """
+
+    def __init__(self, num_joints, hmsize, sigma=None):
+        super(ToHeatmaps, self).__init__()
+        self.num_joints = num_joints
+        self.hmsize = np.array(hmsize)
+        if sigma is None:
+            sigma = hmsize[0] // 64
+        self.sigma = sigma
+
+        r = 6 * sigma + 3
+        x = np.arange(0, r, 1, np.float32)
+        y = x[:, None]
+        x0, y0 = 3 * sigma + 1, 3 * sigma + 1
+        self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
+
+    def __call__(self, records):
+        kpts_lst = records['joints']
+        mask_lst = records['mask']
+        for idx, hmsize in enumerate(self.hmsize):
+            mask = mask_lst[idx]
+            kpts = kpts_lst[idx]
+            heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
+            inds = np.where(kpts[..., 2] > 0)
+            visible = kpts[inds].astype(np.int64)[..., :2]
+            ul = np.round(visible - 3 * self.sigma - 1)
+            br = np.round(visible + 3 * self.sigma + 2)
+            sul = np.maximum(0, -ul)
+            sbr = np.minimum(hmsize, br) - ul
+            dul = np.clip(ul, 0, hmsize - 1)
+            dbr = np.clip(br, 0, hmsize)
+            for i in range(len(visible)):
+                dx1, dy1 = dul[i]
+                dx2, dy2 = dbr[i]
+                sx1, sy1 = sul[i]
+                sx2, sy2 = sbr[i]
+                heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
+                    self.gaussian[sy1:sy2, sx1:sx2],
+                    heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
+            records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
+            records['mask_{}x'.format(idx + 1)] = mask
+        del records['mask']
+        return records
+
+
+@register_keypointop
+class RandomFlipHalfBodyTransform(object):
+    """apply data augment to image and coords
+    to achieve the flip, scale, rotate and half body transform effect for training image
+
+    Args:
+        trainsize (list):[w, h], Image target size
+        upper_body_ids (list): The upper body joint ids
+        flip_pairs (list): The left-right joints exchange order list
+        pixel_std (int): The pixel std of the scale
+        scale (float): The scale factor to transform the image
+        rot (int): The rotate factor to transform the image
+        num_joints_half_body (int): The joints threshold of the half body transform
+        prob_half_body (float): The threshold of the half body transform
+        flip (bool): Whether to flip the image
+
+    Returns:
+        records(dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self,
+                 trainsize,
+                 upper_body_ids,
+                 flip_pairs,
+                 pixel_std,
+                 scale=0.35,
+                 rot=40,
+                 num_joints_half_body=8,
+                 prob_half_body=0.3,
+                 flip=True,
+                 rot_prob=0.6):
+        super(RandomFlipHalfBodyTransform, self).__init__()
+        self.trainsize = trainsize
+        self.upper_body_ids = upper_body_ids
+        self.flip_pairs = flip_pairs
+        self.pixel_std = pixel_std
+        self.scale = scale
+        self.rot = rot
+        self.num_joints_half_body = num_joints_half_body
+        self.prob_half_body = prob_half_body
+        self.flip = flip
+        self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
+        self.rot_prob = rot_prob
+
+    def halfbody_transform(self, joints, joints_vis):
+        upper_joints = []
+        lower_joints = []
+        for joint_id in range(joints.shape[0]):
+            if joints_vis[joint_id][0] > 0:
+                if joint_id in self.upper_body_ids:
+                    upper_joints.append(joints[joint_id])
+                else:
+                    lower_joints.append(joints[joint_id])
+        if np.random.randn() < 0.5 and len(upper_joints) > 2:
+            selected_joints = upper_joints
+        else:
+            selected_joints = lower_joints if len(
+                lower_joints) > 2 else upper_joints
+        if len(selected_joints) < 2:
+            return None, None
+        selected_joints = np.array(selected_joints, dtype=np.float32)
+        center = selected_joints.mean(axis=0)[:2]
+        left_top = np.amin(selected_joints, axis=0)
+        right_bottom = np.amax(selected_joints, axis=0)
+        w = right_bottom[0] - left_top[0]
+        h = right_bottom[1] - left_top[1]
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
+            dtype=np.float32)
+        scale = scale * 1.5
+
+        return center, scale
+
+    def flip_joints(self, joints, joints_vis, width, matched_parts):
+        joints[:, 0] = width - joints[:, 0] - 1
+        for pair in matched_parts:
+            joints[pair[0], :], joints[pair[1], :] = \
+                joints[pair[1], :], joints[pair[0], :].copy()
+            joints_vis[pair[0], :], joints_vis[pair[1], :] = \
+                joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
+
+        return joints * joints_vis, joints_vis
+
+    def __call__(self, records):
+        image = records['image']
+        joints = records['joints']
+        joints_vis = records['joints_vis']
+        c = records['center']
+        s = records['scale']
+        r = 0
+        if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
+                np.random.rand() < self.prob_half_body):
+            c_half_body, s_half_body = self.halfbody_transform(joints,
+                                                               joints_vis)
+            if c_half_body is not None and s_half_body is not None:
+                c, s = c_half_body, s_half_body
+        sf = self.scale
+        rf = self.rot
+        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+        r = np.clip(np.random.randn() * rf, -rf * 2,
+                    rf * 2) if np.random.random() <= self.rot_prob else 0
+
+        if self.flip and np.random.random() <= 0.5:
+            image = image[:, ::-1, :]
+            joints, joints_vis = self.flip_joints(
+                joints, joints_vis, image.shape[1], self.flip_pairs)
+            c[0] = image.shape[1] - c[0] - 1
+        records['image'] = image
+        records['joints'] = joints
+        records['joints_vis'] = joints_vis
+        records['center'] = c
+        records['scale'] = s
+        records['rotate'] = r
+
+        return records
+
+
+@register_keypointop
+class TopDownAffine(object):
+    """apply affine transform to image and coords
+
+    Args:
+        trainsize (list): [w, h], the standard size used to train
+        records(dict): the dict contained the image and coords
+
+    Returns:
+        records (dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self, trainsize):
+        self.trainsize = trainsize
+
+    def __call__(self, records):
+        image = records['image']
+        joints = records['joints']
+        joints_vis = records['joints_vis']
+        rot = records['rotate'] if "rotate" in records else 0
+        trans = get_affine_transform(records['center'], records['scale'] * 200,
+                                     rot, self.trainsize)
+        image = cv2.warpAffine(
+            image,
+            trans, (int(self.trainsize[0]), int(self.trainsize[1])),
+            flags=cv2.INTER_LINEAR)
+        for i in range(joints.shape[0]):
+            if joints_vis[i, 0] > 0.0:
+                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
+        records['image'] = image
+        records['joints'] = joints
+
+        return records
+
+
+@register_keypointop
+class TopDownEvalAffine(object):
+    """apply affine transform to image and coords
+
+    Args:
+        trainsize (list): [w, h], the standard size used to train
+        records(dict): the dict contained the image and coords
+
+    Returns:
+        records (dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self, trainsize):
+        self.trainsize = trainsize
+
+    def __call__(self, records):
+        image = records['image']
+        rot = 0
+        imshape = records['im_shape'][::-1]
+        center = imshape / 2.
+        scale = imshape
+        trans = get_affine_transform(center, scale, rot, self.trainsize)
+        image = cv2.warpAffine(
+            image,
+            trans, (int(self.trainsize[0]), int(self.trainsize[1])),
+            flags=cv2.INTER_LINEAR)
+        records['image'] = image
+
+        return records
+
+
+@register_keypointop
+class ToHeatmapsTopDown(object):
+    """to generate the gaussin heatmaps of keypoint for heatmap loss
+
+    Args:
+        hmsize (list): [w, h] output heatmap's size
+        sigma (float): the std of gaussin kernel genereted
+        records(dict): the dict contained the image and coords
+
+    Returns:
+        records (dict): contain the heatmaps used to heatmaploss
+
+    """
+
+    def __init__(self, hmsize, sigma):
+        super(ToHeatmapsTopDown, self).__init__()
+        self.hmsize = np.array(hmsize)
+        self.sigma = sigma
+
+    def __call__(self, records):
+        joints = records['joints']
+        joints_vis = records['joints_vis']
+        num_joints = joints.shape[0]
+        image_size = np.array(
+            [records['image'].shape[1], records['image'].shape[0]])
+        target_weight = np.ones((num_joints, 1), dtype=np.float32)
+        target_weight[:, 0] = joints_vis[:, 0]
+        target = np.zeros(
+            (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
+        tmp_size = self.sigma * 3
+        for joint_id in range(num_joints):
+            feat_stride = image_size / self.hmsize
+            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
+            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
+            # Check that any part of the gaussian is in-bounds
+            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+            if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
+                    0] < 0 or br[1] < 0:
+                # If not, just return the image as is
+                target_weight[joint_id] = 0
+                continue
+            # # Generate gaussian
+            size = 2 * tmp_size + 1
+            x = np.arange(0, size, 1, np.float32)
+            y = x[:, np.newaxis]
+            x0 = y0 = size // 2
+            # The gaussian is not normalized, we want the center value to equal 1
+            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
+
+            # Usable gaussian range
+            g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
+            g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
+            # Image range
+            img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
+            img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
+
+            v = target_weight[joint_id]
+            if v > 0.5:
+                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
+                    0]:g_y[1], g_x[0]:g_x[1]]
+        records['target'] = target
+        records['target_weight'] = target_weight
+        del records['joints'], records['joints_vis']
+
+        return records
--- a/build/lib/ppdet/data/transform/mot_operators.py
+++ b/build/lib/ppdet/data/transform/mot_operators.py
@ -0,0 +1,634 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from numbers import Integral
+
+import cv2
+import copy
+import numpy as np
+import random
+import math
+
+from .operators import BaseOperator, register_op
+from .batch_operators import Gt2TTFTarget
+from ppdet.modeling.bbox_utils import bbox_iou_np_expand
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
+    'Gt2JDETargetMax', 'Gt2FairMOTTarget'
+]
+
+
+@register_op
+class RGBReverse(BaseOperator):
+    """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
+    """
+
+    def __init__(self):
+        super(RGBReverse, self).__init__()
+
+    def apply(self, sample, context=None):
+        im = sample['image']
+        sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
+        return sample
+
+
+@register_op
+class LetterBoxResize(BaseOperator):
+    def __init__(self, target_size):
+        """
+        Resize image to target size, convert normalized xywh to pixel xyxy
+        format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
+        Args:
+            target_size (int|list): image target size.
+        """
+        super(LetterBoxResize, self).__init__()
+        if not isinstance(target_size, (Integral, Sequence)):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+                format(type(target_size)))
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+
+    def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
+        # letterbox: resize a rectangular image to a padded rectangular
+        shape = img.shape[:2]  # [height, width]
+        ratio_h = float(height) / shape[0]
+        ratio_w = float(width) / shape[1]
+        ratio = min(ratio_h, ratio_w)
+        new_shape = (round(shape[1] * ratio),
+                     round(shape[0] * ratio))  # [width, height]
+        padw = (width - new_shape[0]) / 2
+        padh = (height - new_shape[1]) / 2
+        top, bottom = round(padh - 0.1), round(padh + 0.1)
+        left, right = round(padw - 0.1), round(padw + 0.1)
+
+        img = cv2.resize(
+            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
+            value=color)  # padded rectangular
+        return img, ratio, padw, padh
+
+    def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
+        bboxes = bbox0.copy()
+        bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
+        bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
+        bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
+        bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
+        return bboxes
+
+    def apply(self, sample, context=None):
+        """ Resize the image numpy.
+        """
+        im = sample['image']
+        h, w = sample['im_shape']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
+
+        # apply image
+        height, width = self.target_size
+        img, ratio, padw, padh = self.apply_image(
+            im, height=height, width=width)
+
+        sample['image'] = img
+        new_shape = (round(h * ratio), round(w * ratio))
+        sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
+        sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
+
+        # apply bbox
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
+                                                padw, padh)
+        return sample
+
+
+@register_op
+class MOTRandomAffine(BaseOperator):
+    """ 
+    Affine transform to image and coords to achieve the rotate, scale and
+    shift effect for training image.
+
+    Args:
+        degrees (list[2]): the rotate range to apply, transform range is [min, max]
+        translate (list[2]): the translate range to apply, ransform range is [min, max]
+        scale (list[2]): the scale range to apply, transform range is [min, max]
+        shear (list[2]): the shear range to apply, transform range is [min, max]
+        borderValue (list[3]): value used in case of a constant border when appling
+            the perspective transformation
+        reject_outside (bool): reject warped bounding bboxes outside of image
+
+    Returns:
+        records(dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self,
+                 degrees=(-5, 5),
+                 translate=(0.10, 0.10),
+                 scale=(0.50, 1.20),
+                 shear=(-2, 2),
+                 borderValue=(127.5, 127.5, 127.5),
+                 reject_outside=True):
+        super(MOTRandomAffine, self).__init__()
+        self.degrees = degrees
+        self.translate = translate
+        self.scale = scale
+        self.shear = shear
+        self.borderValue = borderValue
+        self.reject_outside = reject_outside
+
+    def apply(self, sample, context=None):
+        # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+        border = 0  # width of added border (optional)
+
+        img = sample['image']
+        height, width = img.shape[0], img.shape[1]
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.random() * (self.degrees[1] - self.degrees[0]
+                               ) + self.degrees[0]
+        s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
+        R[:2] = cv2.getRotationMatrix2D(
+            angle=a, center=(width / 2, height / 2), scale=s)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = (
+            random.random() * 2 - 1
+        ) * self.translate[0] * height + border  # x translation (pixels)
+        T[1, 2] = (
+            random.random() * 2 - 1
+        ) * self.translate[1] * width + border  # y translation (pixels)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan((random.random() *
+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
+                           math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan((random.random() *
+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
+                           math.pi / 180)  # y shear (deg)
+
+        M = S @T @R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+        imw = cv2.warpPerspective(
+            img,
+            M,
+            dsize=(width, height),
+            flags=cv2.INTER_LINEAR,
+            borderValue=self.borderValue)  # BGR order borderValue
+
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            targets = sample['gt_bbox']
+            n = targets.shape[0]
+            points = targets.copy()
+            area0 = (points[:, 2] - points[:, 0]) * (
+                points[:, 3] - points[:, 1])
+
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = (xy @M.T)[:, :2].reshape(n, 8)
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate(
+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+            # apply angle-based reduction
+            radians = a * math.pi / 180
+            reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
+            x = (xy[:, 2] + xy[:, 0]) / 2
+            y = (xy[:, 3] + xy[:, 1]) / 2
+            w = (xy[:, 2] - xy[:, 0]) * reduction
+            h = (xy[:, 3] - xy[:, 1]) * reduction
+            xy = np.concatenate(
+                (x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+            # reject warped points outside of image
+            if self.reject_outside:
+                np.clip(xy[:, 0], 0, width, out=xy[:, 0])
+                np.clip(xy[:, 2], 0, width, out=xy[:, 2])
+                np.clip(xy[:, 1], 0, height, out=xy[:, 1])
+                np.clip(xy[:, 3], 0, height, out=xy[:, 3])
+            w = xy[:, 2] - xy[:, 0]
+            h = xy[:, 3] - xy[:, 1]
+            area = w * h
+            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+            if sum(i) > 0:
+                sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
+                sample['gt_class'] = sample['gt_class'][i]
+                if 'difficult' in sample:
+                    sample['difficult'] = sample['difficult'][i]
+                if 'gt_ide' in sample:
+                    sample['gt_ide'] = sample['gt_ide'][i]
+                if 'is_crowd' in sample:
+                    sample['is_crowd'] = sample['is_crowd'][i]
+                sample['image'] = imw
+                return sample
+            else:
+                return sample
+
+
+@register_op
+class Gt2JDETargetThres(BaseOperator):
+    __shared__ = ['num_classes']
+    """
+    Generate JDE targets by groud truth data when training
+    Args:
+        anchors (list): anchors of JDE model
+        anchor_masks (list): anchor_masks of JDE model
+        downsample_ratios (list): downsample ratios of JDE model
+        ide_thresh (float): thresh of identity, higher is groud truth 
+        fg_thresh (float): thresh of foreground, higher is foreground
+        bg_thresh (float): thresh of background, lower is background
+        num_classes (int): number of classes
+    """
+
+    def __init__(self,
+                 anchors,
+                 anchor_masks,
+                 downsample_ratios,
+                 ide_thresh=0.5,
+                 fg_thresh=0.5,
+                 bg_thresh=0.4,
+                 num_classes=1):
+        super(Gt2JDETargetThres, self).__init__()
+        self.anchors = anchors
+        self.anchor_masks = anchor_masks
+        self.downsample_ratios = downsample_ratios
+        self.ide_thresh = ide_thresh
+        self.fg_thresh = fg_thresh
+        self.bg_thresh = bg_thresh
+        self.num_classes = num_classes
+
+    def generate_anchor(self, nGh, nGw, anchor_hw):
+        nA = len(anchor_hw)
+        yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
+
+        mesh = np.stack([xx.T, yy.T], axis=0)  # [2, nGh, nGw]
+        mesh = np.repeat(mesh[None, :], nA, axis=0)  # [nA, 2, nGh, nGw]
+
+        anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
+
+        anchor_mesh = np.concatenate(
+            [mesh, anchor_offset_mesh], axis=1)  # [nA, 4, nGh, nGw]
+        return anchor_mesh
+
+    def encode_delta(self, gt_box_list, fg_anchor_list):
+        px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
+                        fg_anchor_list[:, 2], fg_anchor_list[:,3]
+        gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
+                        gt_box_list[:, 2], gt_box_list[:, 3]
+        dx = (gx - px) / pw
+        dy = (gy - py) / ph
+        dw = np.log(gw / pw)
+        dh = np.log(gh / ph)
+        return np.stack([dx, dy, dw, dh], axis=1)
+
+    def pad_box(self, sample, num_max):
+        assert 'gt_bbox' in sample
+        bbox = sample['gt_bbox']
+        gt_num = len(bbox)
+        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
+        if gt_num > 0:
+            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
+        sample['gt_bbox'] = pad_bbox
+        if 'gt_score' in sample:
+            pad_score = np.zeros((num_max, ), dtype=np.float32)
+            if gt_num > 0:
+                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
+            sample['gt_score'] = pad_score
+        if 'difficult' in sample:
+            pad_diff = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
+            sample['difficult'] = pad_diff
+        if 'is_crowd' in sample:
+            pad_crowd = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
+            sample['is_crowd'] = pad_crowd
+        if 'gt_ide' in sample:
+            pad_ide = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
+            sample['gt_ide'] = pad_ide
+        return sample
+
+    def __call__(self, samples, context=None):
+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
+            "anchor_masks', and 'downsample_ratios' should have same length."
+        h, w = samples[0]['image'].shape[1:3]
+
+        num_max = 0
+        for sample in samples:
+            num_max = max(num_max, len(sample['gt_bbox']))
+
+        for sample in samples:
+            gt_bbox = sample['gt_bbox']
+            gt_ide = sample['gt_ide']
+            for i, (anchor_hw, downsample_ratio
+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
+                anchor_hw = np.array(
+                    anchor_hw, dtype=np.float32) / downsample_ratio
+                nA = len(anchor_hw)
+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
+
+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
+                gxy[:, 0] = gxy[:, 0] * nGw
+                gxy[:, 1] = gxy[:, 1] * nGh
+                gwh[:, 0] = gwh[:, 0] * nGw
+                gwh[:, 1] = gwh[:, 1] * nGh
+                gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
+                gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
+                tboxes = np.concatenate([gxy, gwh], axis=1)
+
+                anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
+
+                anchor_list = np.transpose(anchor_mesh,
+                                           (0, 2, 3, 1)).reshape(-1, 4)
+                iou_pdist = bbox_iou_np_expand(
+                    anchor_list, tboxes, x1y1x2y2=False)
+
+                iou_max = np.max(iou_pdist, axis=1)
+                max_gt_index = np.argmax(iou_pdist, axis=1)
+
+                iou_map = iou_max.reshape(nA, nGh, nGw)
+                gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
+
+                id_index = iou_map > self.ide_thresh
+                fg_index = iou_map > self.fg_thresh
+                bg_index = iou_map < self.bg_thresh
+                ign_index = (iou_map < self.fg_thresh) * (
+                    iou_map > self.bg_thresh)
+                tconf[fg_index] = 1
+                tconf[bg_index] = 0
+                tconf[ign_index] = -1
+
+                gt_index = gt_index_map[fg_index]
+                gt_box_list = tboxes[gt_index]
+                gt_id_list = gt_ide[gt_index_map[id_index]]
+
+                if np.sum(fg_index) > 0:
+                    tid[id_index] = gt_id_list
+
+                    fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
+                                                         4)[fg_index]
+                    delta_target = self.encode_delta(gt_box_list,
+                                                     fg_anchor_list)
+                    tbox[fg_index] = delta_target
+
+                sample['tbox{}'.format(i)] = tbox
+                sample['tconf{}'.format(i)] = tconf
+                sample['tide{}'.format(i)] = tid
+            sample.pop('gt_class')
+            sample = self.pad_box(sample, num_max)
+        return samples
+
+
+@register_op
+class Gt2JDETargetMax(BaseOperator):
+    __shared__ = ['num_classes']
+    """
+    Generate JDE targets by groud truth data when evaluating
+    Args:
+        anchors (list): anchors of JDE model
+        anchor_masks (list): anchor_masks of JDE model
+        downsample_ratios (list): downsample ratios of JDE model
+        max_iou_thresh (float): iou thresh for high quality anchor
+        num_classes (int): number of classes
+    """
+
+    def __init__(self,
+                 anchors,
+                 anchor_masks,
+                 downsample_ratios,
+                 max_iou_thresh=0.60,
+                 num_classes=1):
+        super(Gt2JDETargetMax, self).__init__()
+        self.anchors = anchors
+        self.anchor_masks = anchor_masks
+        self.downsample_ratios = downsample_ratios
+        self.max_iou_thresh = max_iou_thresh
+        self.num_classes = num_classes
+
+    def __call__(self, samples, context=None):
+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
+            "anchor_masks', and 'downsample_ratios' should have same length."
+        h, w = samples[0]['image'].shape[1:3]
+        for sample in samples:
+            gt_bbox = sample['gt_bbox']
+            gt_ide = sample['gt_ide']
+            for i, (anchor_hw, downsample_ratio
+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
+                anchor_hw = np.array(
+                    anchor_hw, dtype=np.float32) / downsample_ratio
+                nA = len(anchor_hw)
+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
+
+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
+                gxy[:, 0] = gxy[:, 0] * nGw
+                gxy[:, 1] = gxy[:, 1] * nGh
+                gwh[:, 0] = gwh[:, 0] * nGw
+                gwh[:, 1] = gwh[:, 1] * nGh
+                gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
+                gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
+
+                # iou of targets-anchors (using wh only)
+                box1 = gwh
+                box2 = anchor_hw[:, None, :]
+                inter_area = np.minimum(box1, box2).prod(2)
+                iou = inter_area / (
+                    box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
+
+                # Select best iou_pred and anchor
+                iou_best = iou.max(0)  # best anchor [0-2] for each target
+                a = np.argmax(iou, axis=0)
+
+                # Select best unique target-anchor combinations
+                iou_order = np.argsort(-iou_best)  # best to worst
+
+                # Unique anchor selection
+                u = np.stack((gi, gj, a), 0)[:, iou_order]
+                _, first_unique = np.unique(u, axis=1, return_index=True)
+                mask = iou_order[first_unique]
+                # best anchor must share significant commonality (iou) with target
+                # TODO: examine arbitrary threshold
+                idx = mask[iou_best[mask] > self.max_iou_thresh]
+
+                if len(idx) > 0:
+                    a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
+                    t_box = gt_bbox[idx]
+                    t_id = gt_ide[idx]
+                    if len(t_box.shape) == 1:
+                        t_box = t_box.reshape(1, 4)
+
+                    gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
+                    gxy[:, 0] = gxy[:, 0] * nGw
+                    gxy[:, 1] = gxy[:, 1] * nGh
+                    gwh[:, 0] = gwh[:, 0] * nGw
+                    gwh[:, 1] = gwh[:, 1] * nGh
+
+                    # XY coordinates
+                    tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
+                    # Width and height in yolo method
+                    tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
+                                                                 anchor_hw[a_i])
+                    tconf[a_i, gj_i, gi_i] = 1
+                    tid[a_i, gj_i, gi_i] = t_id
+
+                sample['tbox{}'.format(i)] = tbox
+                sample['tconf{}'.format(i)] = tconf
+                sample['tide{}'.format(i)] = tid
+
+
+class Gt2FairMOTTarget(Gt2TTFTarget):
+    __shared__ = ['num_classes']
+    """
+    Generate FairMOT targets by ground truth data.
+    Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
+        1. the gaussian kernal radius to generate a heatmap.
+        2. the targets needed during traing.
+    
+    Args:
+        num_classes(int): the number of classes.
+        down_ratio(int): the down ratio from images to heatmap, 4 by default.
+        max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
+    """
+
+    def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
+        super(Gt2TTFTarget, self).__init__()
+        self.down_ratio = down_ratio
+        self.num_classes = num_classes
+        self.max_objs = max_objs
+
+    def __call__(self, samples, context=None):
+        for b_id, sample in enumerate(samples):
+            output_h = sample['image'].shape[1] // self.down_ratio
+            output_w = sample['image'].shape[2] // self.down_ratio
+
+            heatmap = np.zeros(
+                (self.num_classes, output_h, output_w), dtype='float32')
+            bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
+            center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
+            index = np.zeros((self.max_objs, ), dtype=np.int64)
+            index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
+            reid = np.zeros((self.max_objs, ), dtype=np.int64)
+            bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
+
+            gt_bbox = sample['gt_bbox']
+            gt_class = sample['gt_class']
+            gt_ide = sample['gt_ide']
+
+            for k in range(len(gt_bbox)):
+                cls_id = gt_class[k][0]
+                bbox = gt_bbox[k]
+                ide = gt_ide[k][0]
+                bbox[[0, 2]] = bbox[[0, 2]] * output_w
+                bbox[[1, 3]] = bbox[[1, 3]] * output_h
+                bbox_amodal = copy.deepcopy(bbox)
+                bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
+                bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
+                bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
+                bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
+                bbox[0] = np.clip(bbox[0], 0, output_w - 1)
+                bbox[1] = np.clip(bbox[1], 0, output_h - 1)
+                h = bbox[3]
+                w = bbox[2]
+
+                bbox_xy = copy.deepcopy(bbox)
+                bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
+                bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
+                bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
+                bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
+
+                if h > 0 and w > 0:
+                    radius = self.gaussian_radius((math.ceil(h), math.ceil(w)))
+                    radius = max(0, int(radius))
+                    ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
+                    ct_int = ct.astype(np.int32)
+                    self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
+                                                radius)
+                    bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
+                            bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
+
+                    index[k] = ct_int[1] * output_w + ct_int[0]
+                    center_offset[k] = ct - ct_int
+                    index_mask[k] = 1
+                    reid[k] = ide
+                    bbox_xys[k] = bbox_xy
+
+            sample['heatmap'] = heatmap
+            sample['index'] = index
+            sample['offset'] = center_offset
+            sample['size'] = bbox_size
+            sample['index_mask'] = index_mask
+            sample['reid'] = reid
+            sample['bbox_xys'] = bbox_xys
+            sample.pop('is_crowd', None)
+            sample.pop('difficult', None)
+            sample.pop('gt_class', None)
+            sample.pop('gt_bbox', None)
+            sample.pop('gt_score', None)
+            sample.pop('gt_ide', None)
+        return samples
+
+    def gaussian_radius(self, det_size, min_overlap=0.7):
+        height, width = det_size
+
+        a1 = 1
+        b1 = (height + width)
+        c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+        sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+        r1 = (b1 + sq1) / 2
+
+        a2 = 4
+        b2 = 2 * (height + width)
+        c2 = (1 - min_overlap) * width * height
+        sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+        r2 = (b2 + sq2) / 2
+
+        a3 = 4 * min_overlap
+        b3 = -2 * min_overlap * (height + width)
+        c3 = (min_overlap - 1) * width * height
+        sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+        r3 = (b3 + sq3) / 2
+        return min(r1, r2, r3)
--- a/build/lib/ppdet/data/transform/op_helper.py
+++ b/build/lib/ppdet/data/transform/op_helper.py
@ -0,0 +1,523 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# this file contains helper methods for BBOX processing
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import random
+import math
+import cv2
+
+
+def meet_emit_constraint(src_bbox, sample_bbox):
+    center_x = (src_bbox[2] + src_bbox[0]) / 2
+    center_y = (src_bbox[3] + src_bbox[1]) / 2
+    if center_x >= sample_bbox[0] and \
+            center_x <= sample_bbox[2] and \
+            center_y >= sample_bbox[1] and \
+            center_y <= sample_bbox[3]:
+        return True
+    return False
+
+
+def clip_bbox(src_bbox):
+    src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
+    src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
+    src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
+    src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
+    return src_bbox
+
+
+def bbox_area(src_bbox):
+    if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
+        return 0.
+    else:
+        width = src_bbox[2] - src_bbox[0]
+        height = src_bbox[3] - src_bbox[1]
+        return width * height
+
+
+def is_overlap(object_bbox, sample_bbox):
+    if object_bbox[0] >= sample_bbox[2] or \
+       object_bbox[2] <= sample_bbox[0] or \
+       object_bbox[1] >= sample_bbox[3] or \
+       object_bbox[3] <= sample_bbox[1]:
+        return False
+    else:
+        return True
+
+
+def filter_and_process(sample_bbox, bboxes, labels, scores=None,
+                       keypoints=None):
+    new_bboxes = []
+    new_labels = []
+    new_scores = []
+    new_keypoints = []
+    new_kp_ignore = []
+    for i in range(len(bboxes)):
+        new_bbox = [0, 0, 0, 0]
+        obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
+        if not meet_emit_constraint(obj_bbox, sample_bbox):
+            continue
+        if not is_overlap(obj_bbox, sample_bbox):
+            continue
+        sample_width = sample_bbox[2] - sample_bbox[0]
+        sample_height = sample_bbox[3] - sample_bbox[1]
+        new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
+        new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
+        new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
+        new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
+        new_bbox = clip_bbox(new_bbox)
+        if bbox_area(new_bbox) > 0:
+            new_bboxes.append(new_bbox)
+            new_labels.append([labels[i][0]])
+            if scores is not None:
+                new_scores.append([scores[i][0]])
+            if keypoints is not None:
+                sample_keypoint = keypoints[0][i]
+                for j in range(len(sample_keypoint)):
+                    kp_len = sample_height if j % 2 else sample_width
+                    sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
+                    sample_keypoint[j] = (
+                        sample_keypoint[j] - sample_coord) / kp_len
+                    sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
+                new_keypoints.append(sample_keypoint)
+                new_kp_ignore.append(keypoints[1][i])
+
+    bboxes = np.array(new_bboxes)
+    labels = np.array(new_labels)
+    scores = np.array(new_scores)
+    if keypoints is not None:
+        keypoints = np.array(new_keypoints)
+        new_kp_ignore = np.array(new_kp_ignore)
+        return bboxes, labels, scores, (keypoints, new_kp_ignore)
+    return bboxes, labels, scores
+
+
+def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
+    new_bboxes = []
+    new_labels = []
+    new_scores = []
+    for i, bbox in enumerate(bboxes):
+        w = float((bbox[2] - bbox[0]) * target_size)
+        h = float((bbox[3] - bbox[1]) * target_size)
+        if w * h < float(min_size * min_size):
+            continue
+        else:
+            new_bboxes.append(bbox)
+            new_labels.append(labels[i])
+            if scores is not None and scores.size != 0:
+                new_scores.append(scores[i])
+    bboxes = np.array(new_bboxes)
+    labels = np.array(new_labels)
+    scores = np.array(new_scores)
+    return bboxes, labels, scores
+
+
+def generate_sample_bbox(sampler):
+    scale = np.random.uniform(sampler[2], sampler[3])
+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
+    aspect_ratio = max(aspect_ratio, (scale**2.0))
+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
+    bbox_width = scale * (aspect_ratio**0.5)
+    bbox_height = scale / (aspect_ratio**0.5)
+    xmin_bound = 1 - bbox_width
+    ymin_bound = 1 - bbox_height
+    xmin = np.random.uniform(0, xmin_bound)
+    ymin = np.random.uniform(0, ymin_bound)
+    xmax = xmin + bbox_width
+    ymax = ymin + bbox_height
+    sampled_bbox = [xmin, ymin, xmax, ymax]
+    return sampled_bbox
+
+
+def generate_sample_bbox_square(sampler, image_width, image_height):
+    scale = np.random.uniform(sampler[2], sampler[3])
+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
+    aspect_ratio = max(aspect_ratio, (scale**2.0))
+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
+    bbox_width = scale * (aspect_ratio**0.5)
+    bbox_height = scale / (aspect_ratio**0.5)
+    if image_height < image_width:
+        bbox_width = bbox_height * image_height / image_width
+    else:
+        bbox_height = bbox_width * image_width / image_height
+    xmin_bound = 1 - bbox_width
+    ymin_bound = 1 - bbox_height
+    xmin = np.random.uniform(0, xmin_bound)
+    ymin = np.random.uniform(0, ymin_bound)
+    xmax = xmin + bbox_width
+    ymax = ymin + bbox_height
+    sampled_bbox = [xmin, ymin, xmax, ymax]
+    return sampled_bbox
+
+
+def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
+                         resize_width):
+    num_gt = len(bbox_labels)
+    # np.random.randint range: [low, high)
+    rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
+
+    if num_gt != 0:
+        norm_xmin = bbox_labels[rand_idx][0]
+        norm_ymin = bbox_labels[rand_idx][1]
+        norm_xmax = bbox_labels[rand_idx][2]
+        norm_ymax = bbox_labels[rand_idx][3]
+
+        xmin = norm_xmin * image_width
+        ymin = norm_ymin * image_height
+        wid = image_width * (norm_xmax - norm_xmin)
+        hei = image_height * (norm_ymax - norm_ymin)
+        range_size = 0
+
+        area = wid * hei
+        for scale_ind in range(0, len(scale_array) - 1):
+            if area > scale_array[scale_ind] ** 2 and area < \
+                    scale_array[scale_ind + 1] ** 2:
+                range_size = scale_ind + 1
+                break
+
+        if area > scale_array[len(scale_array) - 2]**2:
+            range_size = len(scale_array) - 2
+
+        scale_choose = 0.0
+        if range_size == 0:
+            rand_idx_size = 0
+        else:
+            # np.random.randint range: [low, high)
+            rng_rand_size = np.random.randint(0, range_size + 1)
+            rand_idx_size = rng_rand_size % (range_size + 1)
+
+        if rand_idx_size == range_size:
+            min_resize_val = scale_array[rand_idx_size] / 2.0
+            max_resize_val = min(2.0 * scale_array[rand_idx_size],
+                                 2 * math.sqrt(wid * hei))
+            scale_choose = random.uniform(min_resize_val, max_resize_val)
+        else:
+            min_resize_val = scale_array[rand_idx_size] / 2.0
+            max_resize_val = 2.0 * scale_array[rand_idx_size]
+            scale_choose = random.uniform(min_resize_val, max_resize_val)
+
+        sample_bbox_size = wid * resize_width / scale_choose
+
+        w_off_orig = 0.0
+        h_off_orig = 0.0
+        if sample_bbox_size < max(image_height, image_width):
+            if wid <= sample_bbox_size:
+                w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
+                                               xmin)
+            else:
+                w_off_orig = np.random.uniform(xmin,
+                                               xmin + wid - sample_bbox_size)
+
+            if hei <= sample_bbox_size:
+                h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
+                                               ymin)
+            else:
+                h_off_orig = np.random.uniform(ymin,
+                                               ymin + hei - sample_bbox_size)
+
+        else:
+            w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
+            h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
+
+        w_off_orig = math.floor(w_off_orig)
+        h_off_orig = math.floor(h_off_orig)
+
+        # Figure out top left coordinates.
+        w_off = float(w_off_orig / image_width)
+        h_off = float(h_off_orig / image_height)
+
+        sampled_bbox = [
+            w_off, h_off, w_off + float(sample_bbox_size / image_width),
+            h_off + float(sample_bbox_size / image_height)
+        ]
+        return sampled_bbox
+    else:
+        return 0
+
+
+def jaccard_overlap(sample_bbox, object_bbox):
+    if sample_bbox[0] >= object_bbox[2] or \
+        sample_bbox[2] <= object_bbox[0] or \
+        sample_bbox[1] >= object_bbox[3] or \
+        sample_bbox[3] <= object_bbox[1]:
+        return 0
+    intersect_xmin = max(sample_bbox[0], object_bbox[0])
+    intersect_ymin = max(sample_bbox[1], object_bbox[1])
+    intersect_xmax = min(sample_bbox[2], object_bbox[2])
+    intersect_ymax = min(sample_bbox[3], object_bbox[3])
+    intersect_size = (intersect_xmax - intersect_xmin) * (
+        intersect_ymax - intersect_ymin)
+    sample_bbox_size = bbox_area(sample_bbox)
+    object_bbox_size = bbox_area(object_bbox)
+    overlap = intersect_size / (
+        sample_bbox_size + object_bbox_size - intersect_size)
+    return overlap
+
+
+def intersect_bbox(bbox1, bbox2):
+    if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
+        bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
+        intersection_box = [0.0, 0.0, 0.0, 0.0]
+    else:
+        intersection_box = [
+            max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
+            min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
+        ]
+    return intersection_box
+
+
+def bbox_coverage(bbox1, bbox2):
+    inter_box = intersect_bbox(bbox1, bbox2)
+    intersect_size = bbox_area(inter_box)
+
+    if intersect_size > 0:
+        bbox1_size = bbox_area(bbox1)
+        return intersect_size / bbox1_size
+    else:
+        return 0.
+
+
+def satisfy_sample_constraint(sampler,
+                              sample_bbox,
+                              gt_bboxes,
+                              satisfy_all=False):
+    if sampler[6] == 0 and sampler[7] == 0:
+        return True
+    satisfied = []
+    for i in range(len(gt_bboxes)):
+        object_bbox = [
+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
+        ]
+        overlap = jaccard_overlap(sample_bbox, object_bbox)
+        if sampler[6] != 0 and \
+                overlap < sampler[6]:
+            satisfied.append(False)
+            continue
+        if sampler[7] != 0 and \
+                overlap > sampler[7]:
+            satisfied.append(False)
+            continue
+        satisfied.append(True)
+        if not satisfy_all:
+            return True
+
+    if satisfy_all:
+        return np.all(satisfied)
+    else:
+        return False
+
+
+def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
+    if sampler[6] == 0 and sampler[7] == 0:
+        has_jaccard_overlap = False
+    else:
+        has_jaccard_overlap = True
+    if sampler[8] == 0 and sampler[9] == 0:
+        has_object_coverage = False
+    else:
+        has_object_coverage = True
+
+    if not has_jaccard_overlap and not has_object_coverage:
+        return True
+    found = False
+    for i in range(len(gt_bboxes)):
+        object_bbox = [
+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
+        ]
+        if has_jaccard_overlap:
+            overlap = jaccard_overlap(sample_bbox, object_bbox)
+            if sampler[6] != 0 and \
+                    overlap < sampler[6]:
+                continue
+            if sampler[7] != 0 and \
+                    overlap > sampler[7]:
+                continue
+            found = True
+        if has_object_coverage:
+            object_coverage = bbox_coverage(object_bbox, sample_bbox)
+            if sampler[8] != 0 and \
+                    object_coverage < sampler[8]:
+                continue
+            if sampler[9] != 0 and \
+                    object_coverage > sampler[9]:
+                continue
+            found = True
+        if found:
+            return True
+    return found
+
+
+def crop_image_sampling(img, sample_bbox, image_width, image_height,
+                        target_size):
+    # no clipping here
+    xmin = int(sample_bbox[0] * image_width)
+    xmax = int(sample_bbox[2] * image_width)
+    ymin = int(sample_bbox[1] * image_height)
+    ymax = int(sample_bbox[3] * image_height)
+
+    w_off = xmin
+    h_off = ymin
+    width = xmax - xmin
+    height = ymax - ymin
+    cross_xmin = max(0.0, float(w_off))
+    cross_ymin = max(0.0, float(h_off))
+    cross_xmax = min(float(w_off + width - 1.0), float(image_width))
+    cross_ymax = min(float(h_off + height - 1.0), float(image_height))
+    cross_width = cross_xmax - cross_xmin
+    cross_height = cross_ymax - cross_ymin
+
+    roi_xmin = 0 if w_off >= 0 else abs(w_off)
+    roi_ymin = 0 if h_off >= 0 else abs(h_off)
+    roi_width = cross_width
+    roi_height = cross_height
+
+    roi_y1 = int(roi_ymin)
+    roi_y2 = int(roi_ymin + roi_height)
+    roi_x1 = int(roi_xmin)
+    roi_x2 = int(roi_xmin + roi_width)
+
+    cross_y1 = int(cross_ymin)
+    cross_y2 = int(cross_ymin + cross_height)
+    cross_x1 = int(cross_xmin)
+    cross_x2 = int(cross_xmin + cross_width)
+
+    sample_img = np.zeros((height, width, 3))
+    sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
+        img[cross_y1: cross_y2, cross_x1: cross_x2]
+
+    sample_img = cv2.resize(
+        sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
+
+    return sample_img
+
+
+def is_poly(segm):
+    assert isinstance(segm, (list, dict)), \
+        "Invalid segm type: {}".format(type(segm))
+    return isinstance(segm, list)
+
+
+def gaussian_radius(bbox_size, min_overlap):
+    height, width = bbox_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+    radius1 = (b1 - sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+    radius2 = (b2 - sq2) / (2 * a2)
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+    radius3 = (b3 + sq3) / (2 * a3)
+    return min(radius1, radius2, radius3)
+
+
+def draw_gaussian(heatmap, center, radius, k=1, delte=6):
+    diameter = 2 * radius + 1
+    sigma = diameter / delte
+    gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
+
+    x, y = center
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
+                               radius + right]
+    np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+
+
+def gaussian2D(shape, sigma_x=1, sigma_y=1):
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
+                                                            sigma_y)))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+def transform_bbox(sample,
+                   M,
+                   w,
+                   h,
+                   area_thr=0.25,
+                   wh_thr=2,
+                   ar_thr=20,
+                   perspective=False):
+    """
+    transfrom bbox according to tranformation matrix M,
+    refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+    """
+    bbox = sample['gt_bbox']
+    label = sample['gt_class']
+    # rotate bbox
+    n = len(bbox)
+    xy = np.ones((n * 4, 3), dtype=np.float32)
+    xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
+    # xy = xy @ M.T
+    xy = np.matmul(xy, M.T)
+    if perspective:
+        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
+    else:
+        xy = xy[:, :2].reshape(n, 8)
+    # get new bboxes
+    x = xy[:, [0, 2, 4, 6]]
+    y = xy[:, [1, 3, 5, 7]]
+    bbox = np.concatenate(
+        (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+    # clip boxes
+    mask = filter_bbox(bbox, w, h, area_thr)
+    sample['gt_bbox'] = bbox[mask]
+    sample['gt_class'] = sample['gt_class'][mask]
+    if 'is_crowd' in sample:
+        sample['is_crowd'] = sample['is_crowd'][mask]
+    if 'difficult' in sample:
+        sample['difficult'] = sample['difficult'][mask]
+    return sample
+
+
+def filter_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
+    """
+    filter bbox, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+    """
+    # clip boxes
+    area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
+    bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
+    # compute
+    area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    area_ratio = area2 / (area1 + 1e-16)
+    wh = bbox[:, 2:4] - bbox[:, 0:2]
+    ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
+                          wh[:, 0] / (wh[:, 1] + 1e-16))
+    mask = (area_ratio > area_thr) & (
+        (wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
+    return mask
--- a/build/lib/ppdet/data/transform/operators.py
+++ b/build/lib/ppdet/data/transform/operators.py
--- a/build/lib/ppdet/engine/init.py
+++ b/build/lib/ppdet/engine/init.py
@ -0,0 +1,29 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from . import trainer
+from .trainer import *
+from . import tracker
+from .tracker import *
+
+from . import callbacks
+from .callbacks import *
+
+from . import env
+from .env import *
+
+__all__ = trainer.__all__ \
+        + tracker.__all__ \
+        + callbacks.__all__ \
+        + env.__all__
--- a/build/lib/ppdet/engine/callbacks.py
+++ b/build/lib/ppdet/engine/callbacks.py
@ -0,0 +1,258 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import datetime
+import six
+
+import paddle.distributed as dist
+
+from ppdet.utils.checkpoint import save_model
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger('ppdet.engine')
+
+__all__ = ['Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer']
+
+
+class Callback(object):
+    def __init__(self, model):
+        self.model = model
+
+    def on_step_begin(self, status):
+        pass
+
+    def on_step_end(self, status):
+        pass
+
+    def on_epoch_begin(self, status):
+        pass
+
+    def on_epoch_end(self, status):
+        pass
+
+
+class ComposeCallback(object):
+    def __init__(self, callbacks):
+        callbacks = [c for c in list(callbacks) if c is not None]
+        for c in callbacks:
+            assert isinstance(
+                c, Callback), "callback should be subclass of Callback"
+        self._callbacks = callbacks
+
+    def on_step_begin(self, status):
+        for c in self._callbacks:
+            c.on_step_begin(status)
+
+    def on_step_end(self, status):
+        for c in self._callbacks:
+            c.on_step_end(status)
+
+    def on_epoch_begin(self, status):
+        for c in self._callbacks:
+            c.on_epoch_begin(status)
+
+    def on_epoch_end(self, status):
+        for c in self._callbacks:
+            c.on_epoch_end(status)
+
+
+class LogPrinter(Callback):
+    def __init__(self, model):
+        super(LogPrinter, self).__init__(model)
+
+    def on_step_end(self, status):
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            mode = status['mode']
+            if mode == 'train':
+                epoch_id = status['epoch_id']
+                step_id = status['step_id']
+                steps_per_epoch = status['steps_per_epoch']
+                training_staus = status['training_staus']
+                batch_time = status['batch_time']
+                data_time = status['data_time']
+
+                epoches = self.model.cfg.epoch
+                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
+                ))]['batch_size']
+
+                logs = training_staus.log()
+                space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
+                if step_id % self.model.cfg.log_iter == 0:
+                    eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
+                    eta_sec = eta_steps * batch_time.global_avg
+                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
+                    ips = float(batch_size) / batch_time.avg
+                    fmt = ' '.join([
+                        'Epoch: [{}]',
+                        '[{' + space_fmt + '}/{}]',
+                        'learning_rate: {lr:.6f}',
+                        '{meters}',
+                        'eta: {eta}',
+                        'batch_cost: {btime}',
+                        'data_cost: {dtime}',
+                        'ips: {ips:.4f} images/s',
+                    ])
+                    fmt = fmt.format(
+                        epoch_id,
+                        step_id,
+                        steps_per_epoch,
+                        lr=status['learning_rate'],
+                        meters=logs,
+                        eta=eta_str,
+                        btime=str(batch_time),
+                        dtime=str(data_time),
+                        ips=ips)
+                    logger.info(fmt)
+            if mode == 'eval':
+                step_id = status['step_id']
+                if step_id % 100 == 0:
+                    logger.info("Eval iter: {}".format(step_id))
+
+    def on_epoch_end(self, status):
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            mode = status['mode']
+            if mode == 'eval':
+                sample_num = status['sample_num']
+                cost_time = status['cost_time']
+                logger.info('Total sample number: {}, averge FPS: {}'.format(
+                    sample_num, sample_num / cost_time))
+
+
+class Checkpointer(Callback):
+    def __init__(self, model):
+        super(Checkpointer, self).__init__(model)
+        cfg = self.model.cfg
+        self.best_ap = 0.
+        self.save_dir = os.path.join(self.model.cfg.save_dir,
+                                     self.model.cfg.filename)
+        if hasattr(self.model.model, 'student_model'):
+            self.weight = self.model.model.student_model
+        else:
+            self.weight = self.model.model
+
+    def on_epoch_end(self, status):
+        # Checkpointer only performed during training
+        mode = status['mode']
+        epoch_id = status['epoch_id']
+        weight = None
+        save_name = None
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            if mode == 'train':
+                end_epoch = self.model.cfg.epoch
+                if (
+                        epoch_id + 1
+                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
+                    save_name = str(
+                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
+                    weight = self.weight
+            elif mode == 'eval':
+                if 'save_best_model' in status and status['save_best_model']:
+                    for metric in self.model._metrics:
+                        map_res = metric.get_results()
+                        if 'bbox' in map_res:
+                            key = 'bbox'
+                        elif 'keypoint' in map_res:
+                            key = 'keypoint'
+                        else:
+                            key = 'mask'
+                        if key not in map_res:
+                            logger.warning("Evaluation results empty, this may be due to " \
+                                        "training iterations being too few or not " \
+                                        "loading the correct weights.")
+                            return
+                        if map_res[key][0] > self.best_ap:
+                            self.best_ap = map_res[key][0]
+                            save_name = 'best_model'
+                            weight = self.weight
+                        logger.info("Best test {} ap is {:0.3f}.".format(
+                            key, self.best_ap))
+            if weight:
+                save_model(weight, self.model.optimizer, self.save_dir,
+                           save_name, epoch_id + 1)
+
+
+class WiferFaceEval(Callback):
+    def __init__(self, model):
+        super(WiferFaceEval, self).__init__(model)
+
+    def on_epoch_begin(self, status):
+        assert self.model.mode == 'eval', \
+            "WiferFaceEval can only be set during evaluation"
+        for metric in self.model._metrics:
+            metric.update(self.model.model)
+        sys.exit()
+
+
+class VisualDLWriter(Callback):
+    """
+    Use VisualDL to log data or image
+    """
+
+    def __init__(self, model):
+        super(VisualDLWriter, self).__init__(model)
+
+        assert six.PY3, "VisualDL requires Python >= 3.5"
+        try:
+            from visualdl import LogWriter
+        except Exception as e:
+            logger.error('visualdl not found, plaese install visualdl. '
+                         'for example: `pip install visualdl`.')
+            raise e
+        self.vdl_writer = LogWriter(
+            model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
+        self.vdl_loss_step = 0
+        self.vdl_mAP_step = 0
+        self.vdl_image_step = 0
+        self.vdl_image_frame = 0
+
+    def on_step_end(self, status):
+        mode = status['mode']
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            if mode == 'train':
+                training_staus = status['training_staus']
+                for loss_name, loss_value in training_staus.get().items():
+                    self.vdl_writer.add_scalar(loss_name, loss_value,
+                                               self.vdl_loss_step)
+                    self.vdl_loss_step += 1
+            elif mode == 'test':
+                ori_image = status['original_image']
+                result_image = status['result_image']
+                self.vdl_writer.add_image(
+                    "original/frame_{}".format(self.vdl_image_frame), ori_image,
+                    self.vdl_image_step)
+                self.vdl_writer.add_image(
+                    "result/frame_{}".format(self.vdl_image_frame),
+                    result_image, self.vdl_image_step)
+                self.vdl_image_step += 1
+                # each frame can display ten pictures at most.
+                if self.vdl_image_step % 10 == 0:
+                    self.vdl_image_step = 0
+                    self.vdl_image_frame += 1
+
+    def on_epoch_end(self, status):
+        mode = status['mode']
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            if mode == 'eval':
+                for metric in self.model._metrics:
+                    for key, map_value in metric.get_results().items():
+                        self.vdl_writer.add_scalar("{}-mAP".format(key),
+                                                   map_value[0],
+                                                   self.vdl_mAP_step)
+                self.vdl_mAP_step += 1
--- a/build/lib/ppdet/engine/env.py
+++ b/build/lib/ppdet/engine/env.py
@ -0,0 +1,47 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import random
+import numpy as np
+
+import paddle
+from paddle.distributed import fleet
+
+__all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
+
+
+def init_fleet_env():
+    fleet.init(is_collective=True)
+
+
+def init_parallel_env():
+    env = os.environ
+    dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
+    if dist:
+        trainer_id = int(env['PADDLE_TRAINER_ID'])
+        local_seed = (99 + trainer_id)
+        random.seed(local_seed)
+        np.random.seed(local_seed)
+
+    paddle.distributed.init_parallel_env()
+
+
+def set_random_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
--- a/build/lib/ppdet/engine/export_utils.py
+++ b/build/lib/ppdet/engine/export_utils.py
@ -0,0 +1,117 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import yaml
+from collections import OrderedDict
+
+from ppdet.data.source.category import get_categories
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger('ppdet.engine')
+
+# Global dictionary
+TRT_MIN_SUBGRAPH = {
+    'YOLO': 3,
+    'SSD': 60,
+    'RCNN': 40,
+    'RetinaNet': 40,
+    'S2ANet': 80,
+    'EfficientDet': 40,
+    'Face': 3,
+    'TTFNet': 60,
+    'FCOS': 16,
+    'SOLOv2': 60,
+    'HigherHRNet': 3,
+    'HRNet': 3,
+}
+
+KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
+
+
+def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
+    preprocess_list = []
+
+    anno_file = dataset_cfg.get_anno()
+
+    clsid2catid, catid2name = get_categories(metric, anno_file, arch)
+
+    label_list = [str(cat) for cat in catid2name.values()]
+
+    sample_transforms = reader_cfg['sample_transforms']
+    for st in sample_transforms[1:]:
+        for key, value in st.items():
+            p = {'type': key}
+            if key == 'Resize':
+                if int(image_shape[1]) != -1:
+                    value['target_size'] = image_shape[1:]
+            p.update(value)
+            preprocess_list.append(p)
+    batch_transforms = reader_cfg.get('batch_transforms', None)
+    if batch_transforms:
+        for bt in batch_transforms:
+            for key, value in bt.items():
+                # for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride)
+                if key == 'PadBatch':
+                    preprocess_list.append({
+                        'type': 'PadStride',
+                        'stride': value['pad_to_stride']
+                    })
+                    break
+
+    return preprocess_list, label_list
+
+
+def _dump_infer_config(config, path, image_shape, model):
+    arch_state = False
+    from ppdet.core.config.yaml_helpers import setup_orderdict
+    setup_orderdict()
+    infer_cfg = OrderedDict({
+        'mode': 'fluid',
+        'draw_threshold': 0.5,
+        'metric': config['metric'],
+    })
+    infer_arch = config['architecture']
+
+    for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items():
+        if arch in infer_arch:
+            infer_cfg['arch'] = arch
+            infer_cfg['min_subgraph_size'] = min_subgraph_size
+            arch_state = True
+            break
+    if not arch_state:
+        logger.error(
+            'Architecture: {} is not supported for exporting model now'.format(
+                infer_arch))
+        os._exit(0)
+    if 'Mask' in infer_arch:
+        infer_cfg['mask'] = True
+    label_arch = 'detection_arch'
+    if infer_arch in KEYPOINT_ARCH:
+        label_arch = 'keypoint_arch'
+    infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
+        config['TestReader'], config['TestDataset'], config['metric'],
+        label_arch, image_shape)
+
+    if infer_arch == 'S2ANet':
+        # TODO: move background to num_classes
+        if infer_cfg['label_list'][0] != 'background':
+            infer_cfg['label_list'].insert(0, 'background')
+
+    yaml.dump(infer_cfg, open(path, 'w'))
+    logger.info("Export inference config file to {}".format(os.path.join(path)))
--- a/build/lib/ppdet/engine/tracker.py
+++ b/build/lib/ppdet/engine/tracker.py
@ -0,0 +1,421 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import cv2
+import glob
+import paddle
+import numpy as np
+
+from ppdet.core.workspace import create
+from ppdet.utils.checkpoint import load_weight
+
+from ppdet.modeling.mot.utils import Timer, load_det_results
+from ppdet.modeling.mot import visualization as mot_vis
+
+from ppdet.metrics import Metric, MOTMetric
+
+from .callbacks import Callback, ComposeCallback
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['Tracker']
+
+
+class Tracker(object):
+    def __init__(self, cfg, mode='eval'):
+        self.cfg = cfg
+        assert mode.lower() in ['test', 'eval'], \
+                "mode should be 'test' or 'eval'"
+        self.mode = mode.lower()
+        self.optimizer = None
+
+        # build MOT data loader
+        self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
+
+        # build model
+        self.model = create(cfg.architecture)
+
+        self.status = {}
+        self.start_epoch = 0
+
+        # initial default callbacks
+        self._init_callbacks()
+
+        # initial default metrics
+        self._init_metrics()
+        self._reset_metrics()
+
+    def _init_callbacks(self):
+        self._callbacks = []
+        self._compose_callback = None
+
+    def _init_metrics(self):
+        if self.mode in ['test']:
+            self._metrics = []
+            return
+
+        if self.cfg.metric == 'MOT':
+            self._metrics = [MOTMetric(), ]
+        else:
+            logger.warning("Metric not support for metric type {}".format(
+                self.cfg.metric))
+            self._metrics = []
+
+    def _reset_metrics(self):
+        for metric in self._metrics:
+            metric.reset()
+
+    def register_callbacks(self, callbacks):
+        callbacks = [h for h in list(callbacks) if h is not None]
+        for c in callbacks:
+            assert isinstance(c, Callback), \
+                    "metrics shoule be instances of subclass of Metric"
+        self._callbacks.extend(callbacks)
+        self._compose_callback = ComposeCallback(self._callbacks)
+
+    def register_metrics(self, metrics):
+        metrics = [m for m in list(metrics) if m is not None]
+        for m in metrics:
+            assert isinstance(m, Metric), \
+                    "metrics shoule be instances of subclass of Metric"
+        self._metrics.extend(metrics)
+
+    def load_weights_jde(self, weights):
+        load_weight(self.model, weights, self.optimizer)
+
+    def load_weights_sde(self, det_weights, reid_weights):
+        if self.model.detector:
+            load_weight(self.model.detector, det_weights, self.optimizer)
+        load_weight(self.model.reid, reid_weights, self.optimizer)
+
+    def _eval_seq_jde(self,
+                      dataloader,
+                      save_dir=None,
+                      show_image=False,
+                      frame_rate=30):
+        if save_dir:
+            if not os.path.exists(save_dir): os.makedirs(save_dir)
+        tracker = self.model.tracker
+        tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer)
+
+        timer = Timer()
+        results = []
+        frame_id = 0
+        self.status['mode'] = 'track'
+        self.model.eval()
+        for step_id, data in enumerate(dataloader):
+            self.status['step_id'] = step_id
+            if frame_id % 40 == 0:
+                logger.info('Processing frame {} ({:.2f} fps)'.format(
+                    frame_id, 1. / max(1e-5, timer.average_time)))
+
+            # forward
+            timer.tic()
+            online_targets = self.model(data)
+
+            online_tlwhs, online_ids = [], []
+            for t in online_targets:
+                tlwh = t.tlwh
+                tid = t.track_id
+                vertical = tlwh[2] / tlwh[3] > 1.6
+                if tlwh[2] * tlwh[3] > tracker.min_box_area and not vertical:
+                    online_tlwhs.append(tlwh)
+                    online_ids.append(tid)
+            timer.toc()
+
+            # save results
+            results.append((frame_id + 1, online_tlwhs, online_ids))
+            self.save_results(data, frame_id, online_ids, online_tlwhs,
+                              timer.average_time, show_image, save_dir)
+            frame_id += 1
+
+        return results, frame_id, timer.average_time, timer.calls
+
+    def _eval_seq_sde(self,
+                      dataloader,
+                      save_dir=None,
+                      show_image=False,
+                      frame_rate=30,
+                      det_file=''):
+        if save_dir:
+            if not os.path.exists(save_dir): os.makedirs(save_dir)
+        tracker = self.model.tracker
+        use_detector = False if not self.model.detector else True
+
+        timer = Timer()
+        results = []
+        frame_id = 0
+        self.status['mode'] = 'track'
+        self.model.eval()
+        self.model.reid.eval()
+        if not use_detector:
+            dets_list = load_det_results(det_file, len(dataloader))
+            logger.info('Finish loading detection results file {}.'.format(
+                det_file))
+
+        for step_id, data in enumerate(dataloader):
+            self.status['step_id'] = step_id
+            if frame_id % 40 == 0:
+                logger.info('Processing frame {} ({:.2f} fps)'.format(
+                    frame_id, 1. / max(1e-5, timer.average_time)))
+
+            timer.tic()
+            if not use_detector:
+                timer.tic()
+                dets = dets_list[frame_id]
+                bbox_tlwh = paddle.to_tensor(dets['bbox'], dtype='float32')
+                pred_scores = paddle.to_tensor(dets['score'], dtype='float32')
+                if bbox_tlwh.shape[0] > 0:
+                    pred_bboxes = paddle.concat(
+                        (bbox_tlwh[:, 0:2],
+                         bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]),
+                        axis=1)
+                else:
+                    pred_bboxes = []
+                    pred_scores = []
+                data.update({
+                    'pred_bboxes': pred_bboxes,
+                    'pred_scores': pred_scores
+                })
+
+            # forward
+            timer.tic()
+            online_targets = self.model(data)
+
+            online_tlwhs = []
+            online_ids = []
+            for track in online_targets:
+                if not track.is_confirmed() or track.time_since_update > 1:
+                    continue
+                tlwh = track.to_tlwh()
+                track_id = track.track_id
+                online_tlwhs.append(tlwh)
+                online_ids.append(track_id)
+            timer.toc()
+
+            # save results
+            results.append((frame_id + 1, online_tlwhs, online_ids))
+            self.save_results(data, frame_id, online_ids, online_tlwhs,
+                              timer.average_time, show_image, save_dir)
+            frame_id += 1
+
+        return results, frame_id, timer.average_time, timer.calls
+
+    def mot_evaluate(self,
+                     data_root,
+                     seqs,
+                     output_dir,
+                     data_type='mot',
+                     model_type='JDE',
+                     save_images=False,
+                     save_videos=False,
+                     show_image=False,
+                     det_results_dir=''):
+        if not os.path.exists(output_dir): os.makedirs(output_dir)
+        result_root = os.path.join(output_dir, 'mot_results')
+        if not os.path.exists(result_root): os.makedirs(result_root)
+        assert data_type in ['mot', 'kitti'], \
+            "data_type should be 'mot' or 'kitti'"
+        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
+            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+
+        # run tracking
+        n_frame = 0
+        timer_avgs, timer_calls = [], []
+        for seq in seqs:
+            save_dir = os.path.join(output_dir, 'mot_outputs',
+                                    seq) if save_images or save_videos else None
+            logger.info('start seq: {}'.format(seq))
+
+            infer_dir = os.path.join(data_root, seq, 'img1')
+            images = self.get_infer_images(infer_dir)
+            self.dataset.set_images(images)
+
+            dataloader = create('EvalMOTReader')(self.dataset, 0)
+
+            result_filename = os.path.join(result_root, '{}.txt'.format(seq))
+            meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read()
+            frame_rate = int(meta_info[meta_info.find('frameRate') + 10:
+                                       meta_info.find('\nseqLength')])
+
+            if model_type in ['JDE', 'FairMOT']:
+                results, nf, ta, tc = self._eval_seq_jde(
+                    dataloader,
+                    save_dir=save_dir,
+                    show_image=show_image,
+                    frame_rate=frame_rate)
+            elif model_type in ['DeepSORT']:
+                results, nf, ta, tc = self._eval_seq_sde(
+                    dataloader,
+                    save_dir=save_dir,
+                    show_image=show_image,
+                    frame_rate=frame_rate,
+                    det_file=os.path.join(det_results_dir,
+                                          '{}.txt'.format(seq)))
+            else:
+                raise ValueError(model_type)
+
+            self.write_mot_results(result_filename, results, data_type)
+            n_frame += nf
+            timer_avgs.append(ta)
+            timer_calls.append(tc)
+
+            if save_videos:
+                output_video_path = os.path.join(save_dir, '..',
+                                                 '{}_vis.mp4'.format(seq))
+                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
+                    save_dir, output_video_path)
+                os.system(cmd_str)
+                logger.info('Save video in {}.'.format(output_video_path))
+
+            logger.info('Evaluate seq: {}'.format(seq))
+            # update metrics
+            for metric in self._metrics:
+                metric.update(data_root, seq, data_type, result_root,
+                              result_filename)
+
+        timer_avgs = np.asarray(timer_avgs)
+        timer_calls = np.asarray(timer_calls)
+        all_time = np.dot(timer_avgs, timer_calls)
+        avg_time = all_time / np.sum(timer_calls)
+        logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(
+            all_time, 1.0 / avg_time))
+
+        # accumulate metric to log out
+        for metric in self._metrics:
+            metric.accumulate()
+            metric.log()
+        # reset metric states for metric may performed multiple times
+        self._reset_metrics()
+
+    def get_infer_images(self, infer_dir):
+        assert infer_dir is None or os.path.isdir(infer_dir), \
+            "{} is not a directory".format(infer_dir)
+        images = set()
+        assert os.path.isdir(infer_dir), \
+            "infer_dir {} is not a directory".format(infer_dir)
+        exts = ['jpg', 'jpeg', 'png', 'bmp']
+        exts += [ext.upper() for ext in exts]
+        for ext in exts:
+            images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+        images = list(images)
+        images.sort()
+        assert len(images) > 0, "no image found in {}".format(infer_dir)
+        logger.info("Found {} inference images in total.".format(len(images)))
+        return images
+
+    def mot_predict(self,
+                    video_file,
+                    output_dir,
+                    data_type='mot',
+                    model_type='JDE',
+                    save_images=False,
+                    save_videos=True,
+                    show_image=False,
+                    det_results_dir=''):
+        if not os.path.exists(output_dir): os.makedirs(output_dir)
+        result_root = os.path.join(output_dir, 'mot_results')
+        if not os.path.exists(result_root): os.makedirs(result_root)
+        assert data_type in ['mot', 'kitti'], \
+            "data_type should be 'mot' or 'kitti'"
+        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
+            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+
+        # run tracking
+        seq = video_file.split('/')[-1].split('.')[0]
+        save_dir = os.path.join(output_dir, 'mot_outputs',
+                                seq) if save_images or save_videos else None
+        logger.info('Starting tracking {}'.format(video_file))
+
+        self.dataset.set_video(video_file)
+        dataloader = create('TestMOTReader')(self.dataset, 0)
+        result_filename = os.path.join(result_root, '{}.txt'.format(seq))
+        frame_rate = self.dataset.frame_rate
+
+        if model_type in ['JDE', 'FairMOT']:
+            results, nf, ta, tc = self._eval_seq_jde(
+                dataloader,
+                save_dir=save_dir,
+                show_image=show_image,
+                frame_rate=frame_rate)
+        elif model_type in ['DeepSORT']:
+            results, nf, ta, tc = self._eval_seq_sde(
+                dataloader,
+                save_dir=save_dir,
+                show_image=show_image,
+                frame_rate=frame_rate,
+                det_file=os.path.join(det_results_dir, '{}.txt'.format(seq)))
+        else:
+            raise ValueError(model_type)
+
+        if save_videos:
+            output_video_path = os.path.join(save_dir, '..',
+                                             '{}_vis.mp4'.format(seq))
+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
+                save_dir, output_video_path)
+            os.system(cmd_str)
+            logger.info('Save video in {}'.format(output_video_path))
+
+    def write_mot_results(self, filename, results, data_type='mot'):
+        if data_type in ['mot', 'mcmot', 'lab']:
+            save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
+        elif data_type == 'kitti':
+            save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
+        else:
+            raise ValueError(data_type)
+
+        with open(filename, 'w') as f:
+            for frame_id, tlwhs, track_ids in results:
+                if data_type == 'kitti':
+                    frame_id -= 1
+                for tlwh, track_id in zip(tlwhs, track_ids):
+                    if track_id < 0:
+                        continue
+                    x1, y1, w, h = tlwh
+                    x2, y2 = x1 + w, y1 + h
+                    line = save_format.format(
+                        frame=frame_id,
+                        id=track_id,
+                        x1=x1,
+                        y1=y1,
+                        x2=x2,
+                        y2=y2,
+                        w=w,
+                        h=h)
+                    f.write(line)
+        logger.info('MOT results save in {}'.format(filename))
+
+    def save_results(self, data, frame_id, online_ids, online_tlwhs,
+                     average_time, show_image, save_dir):
+        if show_image or save_dir is not None:
+            assert 'ori_image' in data
+            img0 = data['ori_image'].numpy()[0]
+            online_im = mot_vis.plot_tracking(
+                img0,
+                online_tlwhs,
+                online_ids,
+                frame_id=frame_id,
+                fps=1. / average_time)
+        if show_image:
+            cv2.imshow('online_im', online_im)
+        if save_dir is not None:
+            cv2.imwrite(
+                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
+                online_im)
--- a/build/lib/ppdet/engine/trainer.py
+++ b/build/lib/ppdet/engine/trainer.py
@ -0,0 +1,533 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import time
+
+import numpy as np
+from PIL import Image
+
+import paddle
+import paddle.distributed as dist
+from paddle.distributed import fleet
+from paddle import amp
+from paddle.static import InputSpec
+from ppdet.optimizer import ModelEMA
+
+from ppdet.core.workspace import create
+from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
+from ppdet.utils.visualizer import visualize_results, save_result
+from ppdet.metrics import JDEDetMetric, JDEReIDMetric
+from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval
+from ppdet.data.source.category import get_categories
+import ppdet.utils.stats as stats
+
+from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter
+from .export_utils import _dump_infer_config
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger('ppdet.engine')
+
+__all__ = ['Trainer']
+
+
+class Trainer(object):
+    def __init__(self, cfg, mode='train'):
+        self.cfg = cfg
+        assert mode.lower() in ['train', 'eval', 'test'], \
+                "mode should be 'train', 'eval' or 'test'"
+        self.mode = mode.lower()
+        self.optimizer = None
+        self.is_loaded_weights = False
+
+        # build data loader
+        self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
+        if self.mode == 'train':
+            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, cfg.worker_num)
+
+        if cfg.architecture == 'JDE' and self.mode == 'train':
+            cfg['JDEEmbeddingHead'][
+                'num_identifiers'] = self.dataset.total_identities
+
+        if cfg.architecture == 'FairMOT' and self.mode == 'train':
+            cfg['FairMOTEmbeddingHead'][
+                'num_identifiers'] = self.dataset.total_identities
+
+        # build model
+        if 'model' not in self.cfg:
+            self.model = create(cfg.architecture)
+        else:
+            self.model = self.cfg.model
+            self.is_loaded_weights = True
+
+        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
+        if self.use_ema:
+            self.ema = ModelEMA(
+                cfg['ema_decay'], self.model, use_thres_step=True)
+
+        # EvalDataset build with BatchSampler to evaluate in single device
+        # TODO: multi-device evaluate
+        if self.mode == 'eval':
+            self._eval_batch_sampler = paddle.io.BatchSampler(
+                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
+            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, cfg.worker_num, self._eval_batch_sampler)
+        # TestDataset build after user set images, skip loader creation here
+
+        # build optimizer in train mode
+        if self.mode == 'train':
+            steps_per_epoch = len(self.loader)
+            self.lr = create('LearningRate')(steps_per_epoch)
+            self.optimizer = create('OptimizerBuilder')(self.lr,
+                                                        self.model.parameters())
+
+        self._nranks = dist.get_world_size()
+        self._local_rank = dist.get_rank()
+
+        self.status = {}
+
+        self.start_epoch = 0
+        self.end_epoch = cfg.epoch
+
+        # initial default callbacks
+        self._init_callbacks()
+
+        # initial default metrics
+        self._init_metrics()
+        self._reset_metrics()
+
+    def _init_callbacks(self):
+        if self.mode == 'train':
+            self._callbacks = [LogPrinter(self), Checkpointer(self)]
+            if self.cfg.get('use_vdl', False):
+                self._callbacks.append(VisualDLWriter(self))
+            self._compose_callback = ComposeCallback(self._callbacks)
+        elif self.mode == 'eval':
+            self._callbacks = [LogPrinter(self)]
+            if self.cfg.metric == 'WiderFace':
+                self._callbacks.append(WiferFaceEval(self))
+            self._compose_callback = ComposeCallback(self._callbacks)
+        elif self.mode == 'test' and self.cfg.get('use_vdl', False):
+            self._callbacks = [VisualDLWriter(self)]
+            self._compose_callback = ComposeCallback(self._callbacks)
+        else:
+            self._callbacks = []
+            self._compose_callback = None
+
+    def _init_metrics(self, validate=False):
+        if self.mode == 'test' or (self.mode == 'train' and not validate):
+            self._metrics = []
+            return
+        classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
+        if self.cfg.metric == 'COCO':
+            # TODO: bias should be unified
+            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
+            output_eval = self.cfg['output_eval'] \
+                if 'output_eval' in self.cfg else None
+            save_prediction_only = self.cfg.get('save_prediction_only', False)
+
+            # pass clsid2catid info to metric instance to avoid multiple loading
+            # annotation file
+            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
+                                if self.mode == 'eval' else None
+
+            # when do validation in train, annotation file should be get from
+            # EvalReader instead of self.dataset(which is TrainReader)
+            anno_file = self.dataset.get_anno()
+            if self.mode == 'train' and validate:
+                eval_dataset = self.cfg['EvalDataset']
+                eval_dataset.check_or_download_dataset()
+                anno_file = eval_dataset.get_anno()
+
+            IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
+            self._metrics = [
+                COCOMetric(
+                    anno_file=anno_file,
+                    clsid2catid=clsid2catid,
+                    classwise=classwise,
+                    output_eval=output_eval,
+                    bias=bias,
+                    IouType=IouType,
+                    save_prediction_only=save_prediction_only)
+            ]
+        elif self.cfg.metric == 'VOC':
+            self._metrics = [
+                VOCMetric(
+                    label_list=self.dataset.get_label_list(),
+                    class_num=self.cfg.num_classes,
+                    map_type=self.cfg.map_type,
+                    classwise=classwise)
+            ]
+        elif self.cfg.metric == 'WiderFace':
+            multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
+            self._metrics = [
+                WiderFaceMetric(
+                    image_dir=os.path.join(self.dataset.dataset_dir,
+                                           self.dataset.image_dir),
+                    anno_file=self.dataset.get_anno(),
+                    multi_scale=multi_scale)
+            ]
+        elif self.cfg.metric == 'KeyPointTopDownCOCOEval':
+            eval_dataset = self.cfg['EvalDataset']
+            eval_dataset.check_or_download_dataset()
+            anno_file = eval_dataset.get_anno()
+            self._metrics = [
+                KeyPointTopDownCOCOEval(anno_file,
+                                        len(eval_dataset), self.cfg.num_joints,
+                                        self.cfg.save_dir)
+            ]
+        elif self.cfg.metric == 'MOTDet':
+            self._metrics = [JDEDetMetric(), ]
+        elif self.cfg.metric == 'ReID':
+            self._metrics = [JDEReIDMetric(), ]
+        else:
+            logger.warning("Metric not support for metric type {}".format(
+                self.cfg.metric))
+            self._metrics = []
+
+    def _reset_metrics(self):
+        for metric in self._metrics:
+            metric.reset()
+
+    def register_callbacks(self, callbacks):
+        callbacks = [c for c in list(callbacks) if c is not None]
+        for c in callbacks:
+            assert isinstance(c, Callback), \
+                    "metrics shoule be instances of subclass of Metric"
+        self._callbacks.extend(callbacks)
+        self._compose_callback = ComposeCallback(self._callbacks)
+
+    def register_metrics(self, metrics):
+        metrics = [m for m in list(metrics) if m is not None]
+        for m in metrics:
+            assert isinstance(m, Metric), \
+                    "metrics shoule be instances of subclass of Metric"
+        self._metrics.extend(metrics)
+
+    def load_weights(self, weights):
+        if self.is_loaded_weights:
+            return
+        self.start_epoch = 0
+        if hasattr(self.model, 'detector'):
+            if self.model.__class__.__name__ == 'FairMOT':
+                load_pretrain_weight(self.model, weights)
+            else:
+                load_pretrain_weight(self.model.detector, weights)
+        else:
+            load_pretrain_weight(self.model, weights)
+        logger.debug("Load weights {} to start training".format(weights))
+
+    def resume_weights(self, weights):
+        # support Distill resume weights
+        if hasattr(self.model, 'student_model'):
+            self.start_epoch = load_weight(self.model.student_model, weights,
+                                           self.optimizer)
+        else:
+            self.start_epoch = load_weight(self.model, weights, self.optimizer)
+        logger.debug("Resume weights of epoch {}".format(self.start_epoch))
+
+    def train(self, validate=False):
+        assert self.mode == 'train', "Model not in 'train' mode"
+
+        # if validation in training is enabled, metrics should be re-init
+        if validate:
+            self._init_metrics(validate=validate)
+            self._reset_metrics()
+
+        model = self.model
+        if self.cfg.get('fleet', False):
+            model = fleet.distributed_model(model)
+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
+        elif self._nranks > 1:
+            find_unused_parameters = self.cfg[
+                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
+            model = paddle.DataParallel(
+                self.model, find_unused_parameters=find_unused_parameters)
+
+        # initial fp16
+        if self.cfg.get('fp16', False):
+            scaler = amp.GradScaler(
+                enable=self.cfg.use_gpu, init_loss_scaling=1024)
+
+        self.status.update({
+            'epoch_id': self.start_epoch,
+            'step_id': 0,
+            'steps_per_epoch': len(self.loader)
+        })
+
+        self.status['batch_time'] = stats.SmoothedValue(
+            self.cfg.log_iter, fmt='{avg:.4f}')
+        self.status['data_time'] = stats.SmoothedValue(
+            self.cfg.log_iter, fmt='{avg:.4f}')
+        self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
+
+        for epoch_id in range(self.start_epoch, self.cfg.epoch):
+            self.status['mode'] = 'train'
+            self.status['epoch_id'] = epoch_id
+            self._compose_callback.on_epoch_begin(self.status)
+            self.loader.dataset.set_epoch(epoch_id)
+            model.train()
+            iter_tic = time.time()
+            for step_id, data in enumerate(self.loader):
+                self.status['data_time'].update(time.time() - iter_tic)
+                self.status['step_id'] = step_id
+                self._compose_callback.on_step_begin(self.status)
+
+                if self.cfg.get('fp16', False):
+                    with amp.auto_cast(enable=self.cfg.use_gpu):
+                        # model forward
+                        outputs = model(data)
+                        loss = outputs['loss']
+
+                    # model backward
+                    scaled_loss = scaler.scale(loss)
+                    scaled_loss.backward()
+                    # in dygraph mode, optimizer.minimize is equal to optimizer.step
+                    scaler.minimize(self.optimizer, scaled_loss)
+                else:
+                    # model forward
+                    outputs = model(data)
+                    loss = outputs['loss']
+                    # model backward
+                    loss.backward()
+                    self.optimizer.step()
+
+                curr_lr = self.optimizer.get_lr()
+                self.lr.step()
+                self.optimizer.clear_grad()
+                self.status['learning_rate'] = curr_lr
+
+                if self._nranks < 2 or self._local_rank == 0:
+                    self.status['training_staus'].update(outputs)
+
+                self.status['batch_time'].update(time.time() - iter_tic)
+                self._compose_callback.on_step_end(self.status)
+                if self.use_ema:
+                    self.ema.update(self.model)
+                iter_tic = time.time()
+
+            # apply ema weight on model
+            if self.use_ema:
+                weight = copy.deepcopy(self.model.state_dict())
+                self.model.set_dict(self.ema.apply())
+
+            self._compose_callback.on_epoch_end(self.status)
+
+            if validate and (self._nranks < 2 or self._local_rank == 0) \
+                    and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
+                             or epoch_id == self.end_epoch - 1):
+                if not hasattr(self, '_eval_loader'):
+                    # build evaluation dataset and loader
+                    self._eval_dataset = self.cfg.EvalDataset
+                    self._eval_batch_sampler = \
+                        paddle.io.BatchSampler(
+                            self._eval_dataset,
+                            batch_size=self.cfg.EvalReader['batch_size'])
+                    self._eval_loader = create('EvalReader')(
+                        self._eval_dataset,
+                        self.cfg.worker_num,
+                        batch_sampler=self._eval_batch_sampler)
+                with paddle.no_grad():
+                    self.status['save_best_model'] = True
+                    self._eval_with_loader(self._eval_loader)
+
+            # restore origin weight on model
+            if self.use_ema:
+                self.model.set_dict(weight)
+
+    def _eval_with_loader(self, loader):
+        sample_num = 0
+        tic = time.time()
+        self._compose_callback.on_epoch_begin(self.status)
+        self.status['mode'] = 'eval'
+        self.model.eval()
+        for step_id, data in enumerate(loader):
+            self.status['step_id'] = step_id
+            self._compose_callback.on_step_begin(self.status)
+            # forward
+            outs = self.model(data)
+
+            # update metrics
+            for metric in self._metrics:
+                metric.update(data, outs)
+
+            sample_num += data['im_id'].numpy().shape[0]
+            self._compose_callback.on_step_end(self.status)
+
+        self.status['sample_num'] = sample_num
+        self.status['cost_time'] = time.time() - tic
+
+        # accumulate metric to log out
+        for metric in self._metrics:
+            metric.accumulate()
+            metric.log()
+        self._compose_callback.on_epoch_end(self.status)
+        # reset metric states for metric may performed multiple times
+        self._reset_metrics()
+
+    def evaluate(self):
+        with paddle.no_grad():
+            self._eval_with_loader(self.loader)
+
+    def predict(self,
+                images,
+                draw_threshold=0.5,
+                output_dir='output',
+                save_txt=False):
+        self.dataset.set_images(images)
+        loader = create('TestReader')(self.dataset, 0)
+
+        imid2path = self.dataset.get_imid2path()
+
+        anno_file = self.dataset.get_anno()
+        clsid2catid, catid2name = get_categories(
+            self.cfg.metric, anno_file=anno_file)
+
+        # Run Infer 
+        self.status['mode'] = 'test'
+        self.model.eval()
+        for step_id, data in enumerate(loader):
+            self.status['step_id'] = step_id
+            # forward
+            outs = self.model(data)
+
+            for key in ['im_shape', 'scale_factor', 'im_id']:
+                outs[key] = data[key]
+            for key, value in outs.items():
+                if hasattr(value, 'numpy'):
+                    outs[key] = value.numpy()
+
+            batch_res = get_infer_results(outs, clsid2catid)
+            bbox_num = outs['bbox_num']
+
+            start = 0
+            for i, im_id in enumerate(outs['im_id']):
+                image_path = imid2path[int(im_id)]
+                image = Image.open(image_path).convert('RGB')
+                self.status['original_image'] = np.array(image.copy())
+
+                end = start + bbox_num[i]
+                bbox_res = batch_res['bbox'][start:end] \
+                        if 'bbox' in batch_res else None
+                mask_res = batch_res['mask'][start:end] \
+                        if 'mask' in batch_res else None
+                segm_res = batch_res['segm'][start:end] \
+                        if 'segm' in batch_res else None
+                keypoint_res = batch_res['keypoint'][start:end] \
+                        if 'keypoint' in batch_res else None
+                image = visualize_results(
+                    image, bbox_res, mask_res, segm_res, keypoint_res,
+                    int(im_id), catid2name, draw_threshold)
+                self.status['result_image'] = np.array(image.copy())
+                if self._compose_callback:
+                    self._compose_callback.on_step_end(self.status)
+                # save image with detection
+                save_name = self._get_save_image_name(output_dir, image_path)
+                logger.info("Detection bbox results save in {}".format(
+                    save_name))
+                image.save(save_name, quality=95)
+                if save_txt:
+                    save_path = os.path.splitext(save_name)[0] + '.txt'
+                    results = {}
+                    results["im_id"] = im_id
+                    if bbox_res:
+                        results["bbox_res"] = bbox_res
+                    if keypoint_res:
+                        results["keypoint_res"] = keypoint_res
+                    save_result(save_path, results, catid2name, draw_threshold)
+                start = end
+
+    def _get_save_image_name(self, output_dir, image_path):
+        """
+        Get save image name from source image path.
+        """
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        image_name = os.path.split(image_path)[-1]
+        name, ext = os.path.splitext(image_name)
+        return os.path.join(output_dir, "{}".format(name)) + ext
+
+    def export(self, output_dir='output_inference'):
+        self.model.eval()
+        model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
+        save_dir = os.path.join(output_dir, model_name)
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        image_shape = None
+        if 'inputs_def' in self.cfg['TestReader']:
+            inputs_def = self.cfg['TestReader']['inputs_def']
+            image_shape = inputs_def.get('image_shape', None)
+        # set image_shape=[3, -1, -1] as default
+        if image_shape is None:
+            image_shape = [3, -1, -1]
+
+        self.model.eval()
+        if hasattr(self.model, 'deploy'): self.model.deploy = True
+
+        # Save infer cfg
+        _dump_infer_config(self.cfg,
+                           os.path.join(save_dir, 'infer_cfg.yml'), image_shape,
+                           self.model)
+
+        input_spec = [{
+            "image": InputSpec(
+                shape=[None] + image_shape, name='image'),
+            "im_shape": InputSpec(
+                shape=[None, 2], name='im_shape'),
+            "scale_factor": InputSpec(
+                shape=[None, 2], name='scale_factor')
+        }]
+
+        # dy2st and save model
+        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
+            static_model = paddle.jit.to_static(
+                self.model, input_spec=input_spec)
+            # NOTE: dy2st do not pruned program, but jit.save will prune program
+            # input spec, prune input spec here and save with pruned input spec
+            pruned_input_spec = self._prune_input_spec(
+                input_spec, static_model.forward.main_program,
+                static_model.forward.outputs)
+            paddle.jit.save(
+                static_model,
+                os.path.join(save_dir, 'model'),
+                input_spec=pruned_input_spec)
+            logger.info("Export model and saved in {}".format(save_dir))
+        else:
+            self.cfg.slim.save_quantized_model(
+                self.model,
+                os.path.join(save_dir, 'model'),
+                input_spec=input_spec)
+
+    def _prune_input_spec(self, input_spec, program, targets):
+        # try to prune static program to figure out pruned input spec
+        # so we perform following operations in static mode
+        paddle.enable_static()
+        pruned_input_spec = [{}]
+        program = program.clone()
+        program = program._prune(targets=targets)
+        global_block = program.global_block()
+        for name, spec in input_spec[0].items():
+            try:
+                v = global_block.var(name)
+                pruned_input_spec[0][name] = spec
+            except Exception:
+                pass
+        paddle.disable_static()
+        return pruned_input_spec
--- a/build/lib/ppdet/metrics/init.py
+++ b/build/lib/ppdet/metrics/init.py
@ -0,0 +1,23 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import metrics
+from . import keypoint_metrics
+from . import mot_metrics
+
+from .metrics import *
+from .mot_metrics import *
+from .keypoint_metrics import *
+
+__all__ = metrics.__all__ + keypoint_metrics.__all__ + mot_metrics.__all__
--- a/build/lib/ppdet/metrics/coco_utils.py
+++ b/build/lib/ppdet/metrics/coco_utils.py
@ -0,0 +1,184 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import numpy as np
+import itertools
+
+from ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res
+from ppdet.metrics.map_utils import draw_pr_curve
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+def get_infer_results(outs, catid, bias=0):
+    """
+    Get result at the stage of inference.
+    The output format is dictionary containing bbox or mask result.
+
+    For example, bbox result is a list and each element contains
+    image_id, category_id, bbox and score.
+    """
+    if outs is None or len(outs) == 0:
+        raise ValueError(
+            'The number of valid detection result if zero. Please use reasonable model and check input data.'
+        )
+
+    im_id = outs['im_id']
+
+    infer_res = {}
+    if 'bbox' in outs:
+        if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6:
+            infer_res['bbox'] = get_det_poly_res(
+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
+        else:
+            infer_res['bbox'] = get_det_res(
+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
+
+    if 'mask' in outs:
+        # mask post process
+        infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
+                                        outs['bbox_num'], im_id, catid)
+
+    if 'segm' in outs:
+        infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
+
+    if 'keypoint' in outs:
+        infer_res['keypoint'] = get_keypoint_res(outs, im_id)
+        outs['bbox_num'] = [len(infer_res['keypoint'])]
+
+    return infer_res
+
+
+def cocoapi_eval(jsonfile,
+                 style,
+                 coco_gt=None,
+                 anno_file=None,
+                 max_dets=(100, 300, 1000),
+                 classwise=False,
+                 sigmas=None,
+                 use_area=True):
+    """
+    Args:
+        jsonfile (str): Evaluation json file, eg: bbox.json, mask.json.
+        style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`.
+        coco_gt (str): Whether to load COCOAPI through anno_file,
+                 eg: coco_gt = COCO(anno_file)
+        anno_file (str): COCO annotations file.
+        max_dets (tuple): COCO evaluation maxDets.
+        classwise (bool): Whether per-category AP and draw P-R Curve or not.
+        sigmas (nparray): keypoint labelling sigmas.
+        use_area (bool): If gt annotations (eg. CrowdPose, AIC)
+                         do not have 'area', please set use_area=False.
+    """
+    assert coco_gt != None or anno_file != None
+    if style == 'keypoints_crowd':
+        #please install xtcocotools==1.6
+        from xtcocotools.coco import COCO
+        from xtcocotools.cocoeval import COCOeval
+    else:
+        from pycocotools.coco import COCO
+        from pycocotools.cocoeval import COCOeval
+
+    if coco_gt == None:
+        coco_gt = COCO(anno_file)
+    logger.info("Start evaluate...")
+    coco_dt = coco_gt.loadRes(jsonfile)
+    if style == 'proposal':
+        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
+        coco_eval.params.useCats = 0
+        coco_eval.params.maxDets = list(max_dets)
+    elif style == 'keypoints_crowd':
+        coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area)
+    else:
+        coco_eval = COCOeval(coco_gt, coco_dt, style)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    if classwise:
+        # Compute per-category AP and PR curve
+        try:
+            from terminaltables import AsciiTable
+        except Exception as e:
+            logger.error(
+                'terminaltables not found, plaese install terminaltables. '
+                'for example: `pip install terminaltables`.')
+            raise e
+        precisions = coco_eval.eval['precision']
+        cat_ids = coco_gt.getCatIds()
+        # precision: (iou, recall, cls, area range, max dets)
+        assert len(cat_ids) == precisions.shape[2]
+        results_per_category = []
+        for idx, catId in enumerate(cat_ids):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            nm = coco_gt.loadCats(catId)[0]
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            if precision.size:
+                ap = np.mean(precision)
+            else:
+                ap = float('nan')
+            results_per_category.append(
+                (str(nm["name"]), '{:0.3f}'.format(float(ap))))
+            pr_array = precisions[0, :, idx, 0, 2]
+            recall_array = np.arange(0.0, 1.01, 0.01)
+            draw_pr_curve(
+                pr_array,
+                recall_array,
+                out_dir=style + '_pr_curve',
+                file_name='{}_precision_recall_curve.jpg'.format(nm["name"]))
+
+        num_columns = min(6, len(results_per_category) * 2)
+        results_flatten = list(itertools.chain(*results_per_category))
+        headers = ['category', 'AP'] * (num_columns // 2)
+        results_2d = itertools.zip_longest(
+            *[results_flatten[i::num_columns] for i in range(num_columns)])
+        table_data = [headers]
+        table_data += [result for result in results_2d]
+        table = AsciiTable(table_data)
+        logger.info('Per-category of {} AP: \n{}'.format(style, table.table))
+        logger.info("per-category PR curve has output to {} folder.".format(
+            style + '_pr_curve'))
+    # flush coco evaluation result
+    sys.stdout.flush()
+    return coco_eval.stats
+
+
+def json_eval_results(metric, json_directory, dataset):
+    """
+    cocoapi eval with already exists proposal.json, bbox.json or mask.json
+    """
+    assert metric == 'COCO'
+    anno_file = dataset.get_anno()
+    json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
+    if json_directory:
+        assert os.path.exists(
+            json_directory), "The json directory:{} does not exist".format(
+                json_directory)
+        for k, v in enumerate(json_file_list):
+            json_file_list[k] = os.path.join(str(json_directory), v)
+
+    coco_eval_style = ['proposal', 'bbox', 'segm']
+    for i, v_json in enumerate(json_file_list):
+        if os.path.exists(v_json):
+            cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file)
+        else:
+            logger.info("{} not exists!".format(v_json))
--- a/build/lib/ppdet/metrics/json_results.py
+++ b/build/lib/ppdet/metrics/json_results.py
@ -0,0 +1,149 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import six
+import numpy as np
+
+
+def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            w = xmax - xmin + bias
+            h = ymax - ymin + bias
+            bbox = [xmin, ymin, w, h]
+            dt_res = {
+                'image_id': cur_image_id,
+                'category_id': category_id,
+                'bbox': bbox,
+                'score': score
+            }
+            det_res.append(dt_res)
+    return det_res
+
+
+def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
+            dt_res = {
+                'image_id': cur_image_id,
+                'category_id': category_id,
+                'bbox': rbox,
+                'score': score
+            }
+            det_res.append(dt_res)
+    return det_res
+
+
+def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
+    import pycocotools.mask as mask_util
+    seg_res = []
+    k = 0
+    for i in range(len(mask_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = mask_nums[i]
+        for j in range(det_nums):
+            mask = masks[k].astype(np.uint8)
+            score = float(bboxes[k][1])
+            label = int(bboxes[k][0])
+            k = k + 1
+            if label == -1:
+                continue
+            cat_id = label_to_cat_id_map[label]
+            rle = mask_util.encode(
+                np.array(
+                    mask[:, :, None], order="F", dtype="uint8"))[0]
+            if six.PY3:
+                if 'counts' in rle:
+                    rle['counts'] = rle['counts'].decode("utf8")
+            sg_res = {
+                'image_id': cur_image_id,
+                'category_id': cat_id,
+                'segmentation': rle,
+                'score': score
+            }
+            seg_res.append(sg_res)
+    return seg_res
+
+
+def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
+    import pycocotools.mask as mask_util
+    segm_res = []
+    # for each batch
+    segms = results['segm'].astype(np.uint8)
+    clsid_labels = results['cate_label']
+    clsid_scores = results['cate_score']
+    lengths = segms.shape[0]
+    im_id = int(image_id[0][0])
+    if lengths == 0 or segms is None:
+        return None
+    # for each sample
+    for i in range(lengths - 1):
+        clsid = int(clsid_labels[i])
+        catid = num_id_to_cat_id_map[clsid]
+        score = float(clsid_scores[i])
+        mask = segms[i]
+        segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
+        segm['counts'] = segm['counts'].decode('utf8')
+        coco_res = {
+            'image_id': im_id,
+            'category_id': catid,
+            'segmentation': segm,
+            'score': score
+        }
+        segm_res.append(coco_res)
+    return segm_res
+
+
+def get_keypoint_res(results, im_id):
+    anns = []
+    preds = results['keypoint']
+    for idx in range(im_id.shape[0]):
+        image_id = im_id[idx].item()
+        kpts, scores = preds[idx]
+        for kpt, score in zip(kpts, scores):
+            kpt = kpt.flatten()
+            ann = {
+                'image_id': image_id,
+                'category_id': 1,  # XXX hard code
+                'keypoints': kpt.tolist(),
+                'score': float(score)
+            }
+            x = kpt[0::3]
+            y = kpt[1::3]
+            x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item(
+            ), np.max(y).item()
+            ann['area'] = (x1 - x0) * (y1 - y0)
+            ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
+            anns.append(ann)
+    return anns
--- a/build/lib/ppdet/metrics/keypoint_metrics.py
+++ b/build/lib/ppdet/metrics/keypoint_metrics.py
@ -0,0 +1,200 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import json
+from collections import defaultdict
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from ..modeling.keypoint_utils import oks_nms
+
+__all__ = ['KeyPointTopDownCOCOEval']
+
+
+class KeyPointTopDownCOCOEval(object):
+    def __init__(self,
+                 anno_file,
+                 num_samples,
+                 num_joints,
+                 output_eval,
+                 iou_type='keypoints',
+                 in_vis_thre=0.2,
+                 oks_thre=0.9):
+        super(KeyPointTopDownCOCOEval, self).__init__()
+        self.coco = COCO(anno_file)
+        self.num_samples = num_samples
+        self.num_joints = num_joints
+        self.iou_type = iou_type
+        self.in_vis_thre = in_vis_thre
+        self.oks_thre = oks_thre
+        self.output_eval = output_eval
+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
+        self.reset()
+
+    def reset(self):
+        self.results = {
+            'all_preds': np.zeros(
+                (self.num_samples, self.num_joints, 3), dtype=np.float32),
+            'all_boxes': np.zeros((self.num_samples, 6)),
+            'image_path': []
+        }
+        self.eval_results = {}
+        self.idx = 0
+
+    def update(self, inputs, outputs):
+        kpts, _ = outputs['keypoint'][0]
+
+        num_images = inputs['image'].shape[0]
+        self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
+                                  3] = kpts[:, :, 0:3]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
+            'center'].numpy()[:, 0:2]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
+            'scale'].numpy()[:, 0:2]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
+            inputs['scale'].numpy() * 200, 1)
+        self.results['all_boxes'][self.idx:self.idx + num_images,
+                                  5] = np.squeeze(inputs['score'].numpy())
+        self.results['image_path'].extend(inputs['im_id'].numpy())
+
+        self.idx += num_images
+
+    def _write_coco_keypoint_results(self, keypoints):
+        data_pack = [{
+            'cat_id': 1,
+            'cls': 'person',
+            'ann_type': 'keypoints',
+            'keypoints': keypoints
+        }]
+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+        if not os.path.exists(self.output_eval):
+            os.makedirs(self.output_eval)
+        with open(self.res_file, 'w') as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+        try:
+            json.load(open(self.res_file))
+        except Exception:
+            content = []
+            with open(self.res_file, 'r') as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = ']'
+            with open(self.res_file, 'w') as f:
+                for c in content:
+                    f.write(c)
+
+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
+        cat_id = data_pack['cat_id']
+        keypoints = data_pack['keypoints']
+        cat_results = []
+
+        for img_kpts in keypoints:
+            if len(img_kpts) == 0:
+                continue
+
+            _key_points = np.array(
+                [img_kpts[k]['keypoints'] for k in range(len(img_kpts))])
+            _key_points = _key_points.reshape(_key_points.shape[0], -1)
+
+            result = [{
+                'image_id': img_kpts[k]['image'],
+                'category_id': cat_id,
+                'keypoints': _key_points[k].tolist(),
+                'score': img_kpts[k]['score'],
+                'center': list(img_kpts[k]['center']),
+                'scale': list(img_kpts[k]['scale'])
+            } for k in range(len(img_kpts))]
+            cat_results.extend(result)
+
+        return cat_results
+
+    def get_final_results(self, preds, all_boxes, img_path):
+        _kpts = []
+        for idx, kpt in enumerate(preds):
+            _kpts.append({
+                'keypoints': kpt,
+                'center': all_boxes[idx][0:2],
+                'scale': all_boxes[idx][2:4],
+                'area': all_boxes[idx][4],
+                'score': all_boxes[idx][5],
+                'image': int(img_path[idx])
+            })
+        # image x person x (keypoints)
+        kpts = defaultdict(list)
+        for kpt in _kpts:
+            kpts[kpt['image']].append(kpt)
+
+        # rescoring and oks nms
+        num_joints = preds.shape[1]
+        in_vis_thre = self.in_vis_thre
+        oks_thre = self.oks_thre
+        oks_nmsed_kpts = []
+        for img in kpts.keys():
+            img_kpts = kpts[img]
+            for n_p in img_kpts:
+                box_score = n_p['score']
+                kpt_score = 0
+                valid_num = 0
+                for n_jt in range(0, num_joints):
+                    t_s = n_p['keypoints'][n_jt][2]
+                    if t_s > in_vis_thre:
+                        kpt_score = kpt_score + t_s
+                        valid_num = valid_num + 1
+                if valid_num != 0:
+                    kpt_score = kpt_score / valid_num
+                # rescoring
+                n_p['score'] = kpt_score * box_score
+
+            keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
+                           oks_thre)
+
+            if len(keep) == 0:
+                oks_nmsed_kpts.append(img_kpts)
+            else:
+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+        self._write_coco_keypoint_results(oks_nmsed_kpts)
+
+    def accumulate(self):
+        self.get_final_results(self.results['all_preds'],
+                               self.results['all_boxes'],
+                               self.results['image_path'])
+        coco_dt = self.coco.loadRes(self.res_file)
+        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
+        coco_eval.params.useSegm = None
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        keypoint_stats = []
+        for ind in range(len(coco_eval.stats)):
+            keypoint_stats.append((coco_eval.stats[ind]))
+        self.eval_results['keypoint'] = keypoint_stats
+
+    def log(self):
+        stats_names = [
+            'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+            'AR .75', 'AR (M)', 'AR (L)'
+        ]
+        num_values = len(stats_names)
+        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
+        print('|---' * (num_values + 1) + '|')
+
+        print(' '.join([
+            '| {:.3f}'.format(value) for value in self.eval_results['keypoint']
+        ]) + ' |')
+
+    def get_results(self):
+        return self.eval_results
--- a/build/lib/ppdet/metrics/map_utils.py
+++ b/build/lib/ppdet/metrics/map_utils.py
@ -0,0 +1,395 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+import itertools
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'draw_pr_curve',
+    'bbox_area',
+    'jaccard_overlap',
+    'prune_zero_padding',
+    'DetectionMAP',
+    'ap_per_class',
+    'compute_ap',
+]
+
+
+def draw_pr_curve(precision,
+                  recall,
+                  iou=0.5,
+                  out_dir='pr_curve',
+                  file_name='precision_recall_curve.jpg'):
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+    output_path = os.path.join(out_dir, file_name)
+    try:
+        import matplotlib.pyplot as plt
+    except Exception as e:
+        logger.error('Matplotlib not found, plaese install matplotlib.'
+                     'for example: `pip install matplotlib`.')
+        raise e
+    plt.cla()
+    plt.figure('P-R Curve')
+    plt.title('Precision/Recall Curve(IoU={})'.format(iou))
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.grid(True)
+    plt.plot(recall, precision)
+    plt.savefig(output_path)
+
+
+def bbox_area(bbox, is_bbox_normalized):
+    """
+    Calculate area of a bounding box
+    """
+    norm = 1. - float(is_bbox_normalized)
+    width = bbox[2] - bbox[0] + norm
+    height = bbox[3] - bbox[1] + norm
+    return width * height
+
+
+def jaccard_overlap(pred, gt, is_bbox_normalized=False):
+    """
+    Calculate jaccard overlap ratio between two bounding box
+    """
+    if pred[0] >= gt[2] or pred[2] <= gt[0] or \
+        pred[1] >= gt[3] or pred[3] <= gt[1]:
+        return 0.
+    inter_xmin = max(pred[0], gt[0])
+    inter_ymin = max(pred[1], gt[1])
+    inter_xmax = min(pred[2], gt[2])
+    inter_ymax = min(pred[3], gt[3])
+    inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax],
+                           is_bbox_normalized)
+    pred_size = bbox_area(pred, is_bbox_normalized)
+    gt_size = bbox_area(gt, is_bbox_normalized)
+    overlap = float(inter_size) / (pred_size + gt_size - inter_size)
+    return overlap
+
+
+def prune_zero_padding(gt_box, gt_label, difficult=None):
+    valid_cnt = 0
+    for i in range(len(gt_box)):
+        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
+                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
+            break
+        valid_cnt += 1
+    return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
+            if difficult is not None else None)
+
+
+class DetectionMAP(object):
+    """
+    Calculate detection mean average precision.
+    Currently support two types: 11point and integral
+
+    Args:
+        class_num (int): The class number.
+        overlap_thresh (float): The threshold of overlap
+            ratio between prediction bounding box and 
+            ground truth bounding box for deciding 
+            true/false positive. Default 0.5.
+        map_type (str): Calculation method of mean average
+            precision, currently support '11point' and
+            'integral'. Default '11point'.
+        is_bbox_normalized (bool): Whether bounding boxes
+            is normalized to range[0, 1]. Default False.
+        evaluate_difficult (bool): Whether to evaluate
+            difficult bounding boxes. Default False.
+        catid2name (dict): Mapping between category id and category name.
+        classwise (bool): Whether per-category AP and draw
+            P-R Curve or not.
+    """
+
+    def __init__(self,
+                 class_num,
+                 overlap_thresh=0.5,
+                 map_type='11point',
+                 is_bbox_normalized=False,
+                 evaluate_difficult=False,
+                 catid2name=None,
+                 classwise=False):
+        self.class_num = class_num
+        self.overlap_thresh = overlap_thresh
+        assert map_type in ['11point', 'integral'], \
+                "map_type currently only support '11point' "\
+                "and 'integral'"
+        self.map_type = map_type
+        self.is_bbox_normalized = is_bbox_normalized
+        self.evaluate_difficult = evaluate_difficult
+        self.classwise = classwise
+        self.classes = []
+        for cname in catid2name.values():
+            self.classes.append(cname)
+        self.reset()
+
+    def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
+        """
+        Update metric statics from given prediction and ground
+        truth infomations.
+        """
+        if difficult is None:
+            difficult = np.zeros_like(gt_label)
+
+        # record class gt count
+        for gtl, diff in zip(gt_label, difficult):
+            if self.evaluate_difficult or int(diff) == 0:
+                self.class_gt_counts[int(np.array(gtl))] += 1
+
+        # record class score positive
+        visited = [False] * len(gt_label)
+        for b, s, l in zip(bbox, score, label):
+            xmin, ymin, xmax, ymax = b.tolist()
+            pred = [xmin, ymin, xmax, ymax]
+            max_idx = -1
+            max_overlap = -1.0
+            for i, gl in enumerate(gt_label):
+                if int(gl) == int(l):
+                    overlap = jaccard_overlap(pred, gt_box[i],
+                                              self.is_bbox_normalized)
+                    if overlap > max_overlap:
+                        max_overlap = overlap
+                        max_idx = i
+
+            if max_overlap > self.overlap_thresh:
+                if self.evaluate_difficult or \
+                        int(np.array(difficult[max_idx])) == 0:
+                    if not visited[max_idx]:
+                        self.class_score_poss[int(l)].append([s, 1.0])
+                        visited[max_idx] = True
+                    else:
+                        self.class_score_poss[int(l)].append([s, 0.0])
+            else:
+                self.class_score_poss[int(l)].append([s, 0.0])
+
+    def reset(self):
+        """
+        Reset metric statics
+        """
+        self.class_score_poss = [[] for _ in range(self.class_num)]
+        self.class_gt_counts = [0] * self.class_num
+        self.mAP = None
+
+    def accumulate(self):
+        """
+        Accumulate metric results and calculate mAP
+        """
+        mAP = 0.
+        valid_cnt = 0
+        eval_results = []
+        for score_pos, count in zip(self.class_score_poss,
+                                    self.class_gt_counts):
+            if count == 0: continue
+            if len(score_pos) == 0:
+                valid_cnt += 1
+                continue
+
+            accum_tp_list, accum_fp_list = \
+                    self._get_tp_fp_accum(score_pos)
+            precision = []
+            recall = []
+            for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
+                precision.append(float(ac_tp) / (ac_tp + ac_fp))
+                recall.append(float(ac_tp) / count)
+
+            one_class_ap = 0.0
+            if self.map_type == '11point':
+                max_precisions = [0.] * 11
+                start_idx = len(precision) - 1
+                for j in range(10, -1, -1):
+                    for i in range(start_idx, -1, -1):
+                        if recall[i] < float(j) / 10.:
+                            start_idx = i
+                            if j > 0:
+                                max_precisions[j - 1] = max_precisions[j]
+                                break
+                        else:
+                            if max_precisions[j] < precision[i]:
+                                max_precisions[j] = precision[i]
+                one_class_ap = sum(max_precisions) / 11.
+                mAP += one_class_ap
+                valid_cnt += 1
+            elif self.map_type == 'integral':
+                import math
+                prev_recall = 0.
+                for i in range(len(precision)):
+                    recall_gap = math.fabs(recall[i] - prev_recall)
+                    if recall_gap > 1e-6:
+                        one_class_ap += precision[i] * recall_gap
+                        prev_recall = recall[i]
+                mAP += one_class_ap
+                valid_cnt += 1
+            else:
+                logger.error("Unspported mAP type {}".format(self.map_type))
+                sys.exit(1)
+            eval_results.append({
+                'class': self.classes[valid_cnt - 1],
+                'ap': one_class_ap,
+                'precision': precision,
+                'recall': recall,
+            })
+        self.eval_results = eval_results
+        self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
+
+    def get_map(self):
+        """
+        Get mAP result
+        """
+        if self.mAP is None:
+            logger.error("mAP is not calculated.")
+        if self.classwise:
+            # Compute per-category AP and PR curve
+            try:
+                from terminaltables import AsciiTable
+            except Exception as e:
+                logger.error(
+                    'terminaltables not found, plaese install terminaltables. '
+                    'for example: `pip install terminaltables`.')
+                raise e
+            results_per_category = []
+            for eval_result in self.eval_results:
+                results_per_category.append(
+                    (str(eval_result['class']),
+                     '{:0.3f}'.format(float(eval_result['ap']))))
+                draw_pr_curve(
+                    eval_result['precision'],
+                    eval_result['recall'],
+                    out_dir='voc_pr_curve',
+                    file_name='{}_precision_recall_curve.jpg'.format(
+                        eval_result['class']))
+
+            num_columns = min(6, len(results_per_category) * 2)
+            results_flatten = list(itertools.chain(*results_per_category))
+            headers = ['category', 'AP'] * (num_columns // 2)
+            results_2d = itertools.zip_longest(
+                *[results_flatten[i::num_columns] for i in range(num_columns)])
+            table_data = [headers]
+            table_data += [result for result in results_2d]
+            table = AsciiTable(table_data)
+            logger.info('Per-category of VOC AP: \n{}'.format(table.table))
+            logger.info(
+                "per-category PR curve has output to voc_pr_curve folder.")
+        return self.mAP
+
+    def _get_tp_fp_accum(self, score_pos_list):
+        """
+        Calculate accumulating true/false positive results from
+        [score, pos] records
+        """
+        sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True)
+        accum_tp = 0
+        accum_fp = 0
+        accum_tp_list = []
+        accum_fp_list = []
+        for (score, pos) in sorted_list:
+            accum_tp += int(pos)
+            accum_tp_list.append(accum_tp)
+            accum_fp += 1 - int(pos)
+            accum_fp_list.append(accum_fp)
+        return accum_tp_list, accum_fp_list
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """
+    Computes the average precision, given the recall and precision curves.
+    Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    
+    Args:
+        tp (list): True positives.
+        conf (list): Objectness value from 0-1.
+        pred_cls (list): Predicted object classes.
+        target_cls (list): Target object classes.
+    """
+    tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(
+        pred_cls), np.array(target_cls)
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = sum(target_cls == c)  # Number of ground truth objects
+        n_p = sum(i)  # Number of predicted objects
+
+        if (n_p == 0) and (n_gt == 0):
+            continue
+        elif (n_p == 0) or (n_gt == 0):
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = np.cumsum(1 - tp[i])
+            tpc = np.cumsum(tp[i])
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(tpc[-1] / (n_gt + 1e-16))
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(
+        p)
+
+
+def compute_ap(recall, precision):
+    """
+    Computes the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+    
+    Args:
+        recall (list): The recall curve.
+        precision (list): The precision curve.
+
+    Returns:
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
--- a/build/lib/ppdet/metrics/metrics.py
+++ b/build/lib/ppdet/metrics/metrics.py
@ -0,0 +1,301 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import json
+import paddle
+import numpy as np
+
+from .map_utils import prune_zero_padding, DetectionMAP
+from .coco_utils import get_infer_results, cocoapi_eval
+from .widerface_utils import face_eval_run
+from ppdet.data.source.category import get_categories
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric', 'get_infer_results'
+]
+
+COCO_SIGMAS = np.array([
+    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87,
+    .89, .89
+]) / 10.0
+CROWD_SIGMAS = np.array(
+    [.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79,
+     .79]) / 10.0
+
+
+class Metric(paddle.metric.Metric):
+    def name(self):
+        return self.__class__.__name__
+
+    def reset(self):
+        pass
+
+    def accumulate(self):
+        pass
+
+    # paddle.metric.Metric defined :metch:`update`, :meth:`accumulate`
+    # :metch:`reset`, in ppdet, we also need following 2 methods:
+
+    # abstract method for logging metric results
+    def log(self):
+        pass
+
+    # abstract method for getting metric results
+    def get_results(self):
+        pass
+
+
+class COCOMetric(Metric):
+    def __init__(self, anno_file, **kwargs):
+        assert os.path.isfile(anno_file), \
+                "anno_file {} not a file".format(anno_file)
+        self.anno_file = anno_file
+        self.clsid2catid = kwargs.get('clsid2catid', None)
+        if self.clsid2catid is None:
+            self.clsid2catid, _ = get_categories('COCO', anno_file)
+        self.classwise = kwargs.get('classwise', False)
+        self.output_eval = kwargs.get('output_eval', None)
+        # TODO: bias should be unified
+        self.bias = kwargs.get('bias', 0)
+        self.save_prediction_only = kwargs.get('save_prediction_only', False)
+        self.iou_type = kwargs.get('IouType', 'bbox')
+        self.reset()
+
+    def reset(self):
+        # only bbox and mask evaluation support currently
+        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
+        self.eval_results = {}
+
+    def update(self, inputs, outputs):
+        outs = {}
+        # outputs Tensor -> numpy.ndarray
+        for k, v in outputs.items():
+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
+
+        im_id = inputs['im_id']
+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
+                                                    paddle.Tensor) else im_id
+
+        infer_results = get_infer_results(
+            outs, self.clsid2catid, bias=self.bias)
+        self.results['bbox'] += infer_results[
+            'bbox'] if 'bbox' in infer_results else []
+        self.results['mask'] += infer_results[
+            'mask'] if 'mask' in infer_results else []
+        self.results['segm'] += infer_results[
+            'segm'] if 'segm' in infer_results else []
+        self.results['keypoint'] += infer_results[
+            'keypoint'] if 'keypoint' in infer_results else []
+
+    def accumulate(self):
+        if len(self.results['bbox']) > 0:
+            output = "bbox.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['bbox'], f)
+                logger.info('The bbox result is saved to bbox.json.')
+
+            if self.save_prediction_only:
+                logger.info('The bbox result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                bbox_stats = cocoapi_eval(
+                    output,
+                    'bbox',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['bbox'] = bbox_stats
+                sys.stdout.flush()
+
+        if len(self.results['mask']) > 0:
+            output = "mask.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['mask'], f)
+                logger.info('The mask result is saved to mask.json.')
+
+            if self.save_prediction_only:
+                logger.info('The mask result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                seg_stats = cocoapi_eval(
+                    output,
+                    'segm',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['mask'] = seg_stats
+                sys.stdout.flush()
+
+        if len(self.results['segm']) > 0:
+            output = "segm.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['segm'], f)
+                logger.info('The segm result is saved to segm.json.')
+
+            if self.save_prediction_only:
+                logger.info('The segm result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                seg_stats = cocoapi_eval(
+                    output,
+                    'segm',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['mask'] = seg_stats
+                sys.stdout.flush()
+
+        if len(self.results['keypoint']) > 0:
+            output = "keypoint.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['keypoint'], f)
+                logger.info('The keypoint result is saved to keypoint.json.')
+
+            if self.save_prediction_only:
+                logger.info('The keypoint result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                style = 'keypoints'
+                use_area = True
+                sigmas = COCO_SIGMAS
+                if self.iou_type == 'keypoints_crowd':
+                    style = 'keypoints_crowd'
+                    use_area = False
+                    sigmas = CROWD_SIGMAS
+                keypoint_stats = cocoapi_eval(
+                    output,
+                    style,
+                    anno_file=self.anno_file,
+                    classwise=self.classwise,
+                    sigmas=sigmas,
+                    use_area=use_area)
+                self.eval_results['keypoint'] = keypoint_stats
+                sys.stdout.flush()
+
+    def log(self):
+        pass
+
+    def get_results(self):
+        return self.eval_results
+
+
+class VOCMetric(Metric):
+    def __init__(self,
+                 label_list,
+                 class_num=20,
+                 overlap_thresh=0.5,
+                 map_type='11point',
+                 is_bbox_normalized=False,
+                 evaluate_difficult=False,
+                 classwise=False):
+        assert os.path.isfile(label_list), \
+                "label_list {} not a file".format(label_list)
+        self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
+
+        self.overlap_thresh = overlap_thresh
+        self.map_type = map_type
+        self.evaluate_difficult = evaluate_difficult
+        self.detection_map = DetectionMAP(
+            class_num=class_num,
+            overlap_thresh=overlap_thresh,
+            map_type=map_type,
+            is_bbox_normalized=is_bbox_normalized,
+            evaluate_difficult=evaluate_difficult,
+            catid2name=self.catid2name,
+            classwise=classwise)
+
+        self.reset()
+
+    def reset(self):
+        self.detection_map.reset()
+
+    def update(self, inputs, outputs):
+        bbox_np = outputs['bbox'].numpy()
+        bboxes = bbox_np[:, 2:]
+        scores = bbox_np[:, 1]
+        labels = bbox_np[:, 0]
+        bbox_lengths = outputs['bbox_num'].numpy()
+
+        if bboxes.shape == (1, 1) or bboxes is None:
+            return
+        gt_boxes = inputs['gt_bbox']
+        gt_labels = inputs['gt_class']
+        difficults = inputs['difficult'] if not self.evaluate_difficult \
+                            else None
+
+        scale_factor = inputs['scale_factor'].numpy(
+        ) if 'scale_factor' in inputs else np.ones(
+            (gt_boxes.shape[0], 2)).astype('float32')
+
+        bbox_idx = 0
+        for i in range(len(gt_boxes)):
+            gt_box = gt_boxes[i].numpy()
+            h, w = scale_factor[i]
+            gt_box = gt_box / np.array([w, h, w, h])
+            gt_label = gt_labels[i].numpy()
+            difficult = None if difficults is None \
+                            else difficults[i].numpy()
+            bbox_num = bbox_lengths[i]
+            bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
+            score = scores[bbox_idx:bbox_idx + bbox_num]
+            label = labels[bbox_idx:bbox_idx + bbox_num]
+            gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
+                                                             difficult)
+            self.detection_map.update(bbox, score, label, gt_box, gt_label,
+                                      difficult)
+            bbox_idx += bbox_num
+
+    def accumulate(self):
+        logger.info("Accumulating evaluatation results...")
+        self.detection_map.accumulate()
+
+    def log(self):
+        map_stat = 100. * self.detection_map.get_map()
+        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(self.overlap_thresh,
+                                                       self.map_type, map_stat))
+
+    def get_results(self):
+        return {'bbox': [self.detection_map.get_map()]}
+
+
+class WiderFaceMetric(Metric):
+    def __init__(self, image_dir, anno_file, multi_scale=True):
+        self.image_dir = image_dir
+        self.anno_file = anno_file
+        self.multi_scale = multi_scale
+        self.clsid2catid, self.catid2name = get_categories('widerface')
+
+    def update(self, model):
+
+        face_eval_run(
+            model,
+            self.image_dir,
+            self.anno_file,
+            pred_dir='output/pred',
+            eval_mode='widerface',
+            multi_scale=self.multi_scale)
--- a/build/lib/ppdet/metrics/mot_eval_utils.py
+++ b/build/lib/ppdet/metrics/mot_eval_utils.py
@ -0,0 +1,191 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import copy
+import motmetrics as mm
+mm.lap.default_solver = 'lap'
+
+__all__ = [
+    'read_mot_results',
+    'unzip_objs',
+    'MOTEvaluator',
+]
+
+
+def read_mot_results(filename, is_gt=False, is_ignore=False):
+    valid_labels = {1}
+    ignore_labels = {2, 7, 8, 12}
+    results_dict = dict()
+    if os.path.isfile(filename):
+        with open(filename, 'r') as f:
+            for line in f.readlines():
+                linelist = line.split(',')
+                if len(linelist) < 7:
+                    continue
+                fid = int(linelist[0])
+                if fid < 1:
+                    continue
+                results_dict.setdefault(fid, list())
+
+                box_size = float(linelist[4]) * float(linelist[5])
+
+                if is_gt:
+                    if 'MOT16-' in filename or 'MOT17-' in filename or 'MOT15-' in filename or 'MOT20-' in filename:
+                        label = int(float(linelist[7]))
+                        mark = int(float(linelist[6]))
+                        if mark == 0 or label not in valid_labels:
+                            continue
+                    score = 1
+                elif is_ignore:
+                    if 'MOT16-' in filename or 'MOT17-' in filename or 'MOT15-' in filename or 'MOT20-' in filename:
+                        label = int(float(linelist[7]))
+                        vis_ratio = float(linelist[8])
+                        if label not in ignore_labels and vis_ratio >= 0:
+                            continue
+                    else:
+                        continue
+                    score = 1
+                else:
+                    score = float(linelist[6])
+
+                tlwh = tuple(map(float, linelist[2:6]))
+                target_id = int(linelist[1])
+
+                results_dict[fid].append((tlwh, target_id, score))
+    return results_dict
+
+
+"""
+labels={'ped', ...			    % 1
+        'person_on_vhcl', ...	% 2
+        'car', ...				% 3
+        'bicycle', ...			% 4
+        'mbike', ...			% 5
+        'non_mot_vhcl', ...		% 6
+        'static_person', ...	% 7
+        'distractor', ...		% 8
+        'occluder', ...			% 9
+        'occluder_on_grnd', ...	% 10
+        'occluder_full', ...	% 11
+        'reflection', ...		% 12
+        'crowd' ...			    % 13
+};
+"""
+
+
+def unzip_objs(objs):
+    if len(objs) > 0:
+        tlwhs, ids, scores = zip(*objs)
+    else:
+        tlwhs, ids, scores = [], [], []
+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
+    return tlwhs, ids, scores
+
+
+class MOTEvaluator(object):
+    def __init__(self, data_root, seq_name, data_type):
+        self.data_root = data_root
+        self.seq_name = seq_name
+        self.data_type = data_type
+
+        self.load_annotations()
+        self.reset_accumulator()
+
+    def load_annotations(self):
+        assert self.data_type == 'mot'
+        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
+                                   'gt.txt')
+        self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
+        self.gt_ignore_frame_dict = read_mot_results(
+            gt_filename, is_ignore=True)
+
+    def reset_accumulator(self):
+        self.acc = mm.MOTAccumulator(auto_id=True)
+
+    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
+        # results
+        trk_tlwhs = np.copy(trk_tlwhs)
+        trk_ids = np.copy(trk_ids)
+
+        # gts
+        gt_objs = self.gt_frame_dict.get(frame_id, [])
+        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
+
+        # ignore boxes
+        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
+        ignore_tlwhs = unzip_objs(ignore_objs)[0]
+
+        # remove ignored results
+        keep = np.ones(len(trk_tlwhs), dtype=bool)
+        iou_distance = mm.distances.iou_matrix(
+            ignore_tlwhs, trk_tlwhs, max_iou=0.5)
+        if len(iou_distance) > 0:
+            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
+            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
+            match_ious = iou_distance[match_is, match_js]
+
+            match_js = np.asarray(match_js, dtype=int)
+            match_js = match_js[np.logical_not(np.isnan(match_ious))]
+            keep[match_js] = False
+            trk_tlwhs = trk_tlwhs[keep]
+            trk_ids = trk_ids[keep]
+
+        # get distance matrix
+        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
+
+        # acc
+        self.acc.update(gt_ids, trk_ids, iou_distance)
+
+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
+                                                            'last_mot_events'):
+            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
+        else:
+            events = None
+        return events
+
+    def eval_file(self, filename):
+        self.reset_accumulator()
+
+        result_frame_dict = read_mot_results(filename, is_gt=False)
+        frames = sorted(list(set(result_frame_dict.keys())))
+        for frame_id in frames:
+            trk_objs = result_frame_dict.get(frame_id, [])
+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
+            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
+
+        return self.acc
+
+    @staticmethod
+    def get_summary(accs,
+                    names,
+                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
+                             'precision', 'recall')):
+        names = copy.deepcopy(names)
+        if metrics is None:
+            metrics = mm.metrics.motchallenge_metrics
+        metrics = copy.deepcopy(metrics)
+
+        mh = mm.metrics.create()
+        summary = mh.compute_many(
+            accs, metrics=metrics, names=names, generate_overall=True)
+        return summary
+
+    @staticmethod
+    def save_summary(summary, filename):
+        import pandas as pd
+        writer = pd.ExcelWriter(filename)
+        summary.to_excel(writer)
+        writer.save()
--- a/build/lib/ppdet/metrics/mot_metrics.py
+++ b/build/lib/ppdet/metrics/mot_metrics.py
@ -0,0 +1,183 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import paddle
+import numpy as np
+from scipy import interpolate
+import paddle.nn.functional as F
+from .map_utils import ap_per_class
+from ppdet.modeling.bbox_utils import bbox_iou_np_expand
+from .mot_eval_utils import MOTEvaluator
+from .metrics import Metric
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['JDEDetMetric', 'JDEReIDMetric', 'MOTMetric']
+
+
+class JDEDetMetric(Metric):
+    def __init__(self, overlap_thresh=0.5):
+        self.overlap_thresh = overlap_thresh
+        self.reset()
+
+    def reset(self):
+        self.AP_accum = np.zeros(1)
+        self.AP_accum_count = np.zeros(1)
+
+    def update(self, inputs, outputs):
+        bboxes = outputs['bbox'][:, 2:].numpy()
+        scores = outputs['bbox'][:, 1].numpy()
+        labels = outputs['bbox'][:, 0].numpy()
+        bbox_lengths = outputs['bbox_num'].numpy()
+        if bboxes.shape[0] == 1 and bboxes.sum() == 0.0:
+            return
+
+        gt_boxes = inputs['gt_bbox'].numpy()[0]
+        gt_labels = inputs['gt_class'].numpy()[0]
+        if gt_labels.shape[0] == 0:
+            return
+
+        correct = []
+        detected = []
+        for i in range(bboxes.shape[0]):
+            obj_pred = 0
+            pred_bbox = bboxes[i].reshape(1, 4)
+            # Compute iou with target boxes
+            iou = bbox_iou_np_expand(pred_bbox, gt_boxes, x1y1x2y2=True)[0]
+            # Extract index of largest overlap
+            best_i = np.argmax(iou)
+            # If overlap exceeds threshold and classification is correct mark as correct
+            if iou[best_i] > self.overlap_thresh and obj_pred == gt_labels[
+                    best_i] and best_i not in detected:
+                correct.append(1)
+                detected.append(best_i)
+            else:
+                correct.append(0)
+
+        # Compute Average Precision (AP) per class
+        target_cls = list(gt_labels.T[0])
+        AP, AP_class, R, P = ap_per_class(
+            tp=correct,
+            conf=scores,
+            pred_cls=np.zeros_like(scores),
+            target_cls=target_cls)
+        self.AP_accum_count += np.bincount(AP_class, minlength=1)
+        self.AP_accum += np.bincount(AP_class, minlength=1, weights=AP)
+
+    def accumulate(self):
+        logger.info("Accumulating evaluatation results...")
+        self.map_stat = self.AP_accum[0] / (self.AP_accum_count[0] + 1E-16)
+
+    def log(self):
+        map_stat = 100. * self.map_stat
+        logger.info("mAP({:.2f}) = {:.2f}%".format(self.overlap_thresh,
+                                                   map_stat))
+
+    def get_results(self):
+        return self.map_stat
+
+
+class JDEReIDMetric(Metric):
+    def __init__(self, far_levels=[1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]):
+        self.far_levels = far_levels
+        self.reset()
+
+    def reset(self):
+        self.embedding = []
+        self.id_labels = []
+        self.eval_results = {}
+
+    def update(self, inputs, outputs):
+        for out in outputs:
+            feat, label = out[:-1].clone().detach(), int(out[-1])
+            if label != -1:
+                self.embedding.append(feat)
+                self.id_labels.append(label)
+
+    def accumulate(self):
+        logger.info("Computing pairwise similairity...")
+        assert len(self.embedding) == len(self.id_labels)
+        if len(self.embedding) < 1:
+            return None
+        embedding = paddle.stack(self.embedding, axis=0)
+        emb = F.normalize(embedding, axis=1).numpy()
+        pdist = np.matmul(emb, emb.T)
+
+        id_labels = np.array(self.id_labels, dtype='int32').reshape(-1, 1)
+        n = len(id_labels)
+        id_lbl = np.tile(id_labels, n).T
+        gt = id_lbl == id_lbl.T
+
+        up_triangle = np.where(np.triu(pdist) - np.eye(n) * pdist != 0)
+        pdist = pdist[up_triangle]
+        gt = gt[up_triangle]
+
+        # lazy import metrics here
+        from sklearn import metrics
+        far, tar, threshold = metrics.roc_curve(gt, pdist)
+        interp = interpolate.interp1d(far, tar)
+        tar_at_far = [interp(x) for x in self.far_levels]
+
+        for f, fa in enumerate(self.far_levels):
+            self.eval_results['TPR@FAR={:.7f}'.format(fa)] = ' {:.4f}'.format(
+                tar_at_far[f])
+
+    def log(self):
+        for k, v in self.eval_results.items():
+            logger.info('{}: {}'.format(k, v))
+
+    def get_results(self):
+        return self.eval_results
+
+
+class MOTMetric(Metric):
+    def __init__(self, save_summary=False):
+        self.save_summary = save_summary
+        self.MOTEvaluator = MOTEvaluator
+        self.result_root = None
+        self.reset()
+
+    def reset(self):
+        self.accs = []
+        self.seqs = []
+
+    def update(self, data_root, seq, data_type, result_root, result_filename):
+        evaluator = self.MOTEvaluator(data_root, seq, data_type)
+        self.accs.append(evaluator.eval_file(result_filename))
+        self.seqs.append(seq)
+        self.result_root = result_root
+
+    def accumulate(self):
+        import motmetrics as mm
+        metrics = mm.metrics.motchallenge_metrics
+        mh = mm.metrics.create()
+        summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
+        self.strsummary = mm.io.render_summary(
+            summary,
+            formatters=mh.formatters,
+            namemap=mm.io.motchallenge_metric_names)
+        if self.save_summary:
+            self.MOTEvaluator.save_summary(
+                summary, os.path.join(self.result_root, 'summary.xlsx'))
+
+    def log(self):
+        print(self.strsummary)
+
+    def get_results(self):
+        return self.strsummary
--- a/build/lib/ppdet/metrics/widerface_utils.py
+++ b/build/lib/ppdet/metrics/widerface_utils.py
@ -0,0 +1,391 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import cv2
+import numpy as np
+from collections import OrderedDict
+
+import paddle
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['face_eval_run', 'lmk2out']
+
+
+def face_eval_run(model,
+                  image_dir,
+                  gt_file,
+                  pred_dir='output/pred',
+                  eval_mode='widerface',
+                  multi_scale=False):
+    # load ground truth files
+    with open(gt_file, 'r') as f:
+        gt_lines = f.readlines()
+    imid2path = []
+    pos_gt = 0
+    while pos_gt < len(gt_lines):
+        name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
+        imid2path.append(name_gt)
+        pos_gt += 1
+        n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
+        pos_gt += 1 + n_gt
+    logger.info('The ground truth file load {} images'.format(len(imid2path)))
+
+    dets_dist = OrderedDict()
+    for iter_id, im_path in enumerate(imid2path):
+        image_path = os.path.join(image_dir, im_path)
+        if eval_mode == 'fddb':
+            image_path += '.jpg'
+        assert os.path.exists(image_path)
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        if multi_scale:
+            shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
+            det0 = detect_face(model, image, shrink)
+            det1 = flip_test(model, image, shrink)
+            [det2, det3] = multi_scale_test(model, image, max_shrink)
+            det4 = multi_scale_test_pyramid(model, image, max_shrink)
+            det = np.row_stack((det0, det1, det2, det3, det4))
+            dets = bbox_vote(det)
+        else:
+            dets = detect_face(model, image, 1)
+        if eval_mode == 'widerface':
+            save_widerface_bboxes(image_path, dets, pred_dir)
+        else:
+            dets_dist[im_path] = dets
+        if iter_id % 100 == 0:
+            logger.info('Test iter {}'.format(iter_id))
+    if eval_mode == 'fddb':
+        save_fddb_bboxes(dets_dist, pred_dir)
+    logger.info("Finish evaluation.")
+
+
+def detect_face(model, image, shrink):
+    image_shape = [image.shape[0], image.shape[1]]
+    if shrink != 1:
+        h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
+        image = cv2.resize(image, (w, h))
+        image_shape = [h, w]
+
+    img = face_img_process(image)
+    image_shape = np.asarray([image_shape])
+    scale_factor = np.asarray([[shrink, shrink]])
+    data = {
+        "image": paddle.to_tensor(
+            img, dtype='float32'),
+        "im_shape": paddle.to_tensor(
+            image_shape, dtype='float32'),
+        "scale_factor": paddle.to_tensor(
+            scale_factor, dtype='float32')
+    }
+    model.eval()
+    detection = model(data)
+    detection = detection['bbox'].numpy()
+    # layout: xmin, ymin, xmax. ymax, score
+    if np.prod(detection.shape) == 1:
+        logger.info("No face detected")
+        return np.array([[0, 0, 0, 0, 0]])
+    det_conf = detection[:, 1]
+    det_xmin = detection[:, 2]
+    det_ymin = detection[:, 3]
+    det_xmax = detection[:, 4]
+    det_ymax = detection[:, 5]
+
+    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
+    return det
+
+
+def flip_test(model, image, shrink):
+    img = cv2.flip(image, 1)
+    det_f = detect_face(model, img, shrink)
+    det_t = np.zeros(det_f.shape)
+    img_width = image.shape[1]
+    det_t[:, 0] = img_width - det_f[:, 2]
+    det_t[:, 1] = det_f[:, 1]
+    det_t[:, 2] = img_width - det_f[:, 0]
+    det_t[:, 3] = det_f[:, 3]
+    det_t[:, 4] = det_f[:, 4]
+    return det_t
+
+
+def multi_scale_test(model, image, max_shrink):
+    # Shrink detecting is only used to detect big faces
+    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
+    det_s = detect_face(model, image, st)
+    index = np.where(
+        np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
+        > 30)[0]
+    det_s = det_s[index, :]
+    # Enlarge one times
+    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
+    det_b = detect_face(model, image, bt)
+
+    # Enlarge small image x times for small faces
+    if max_shrink > 2:
+        bt *= 2
+        while bt < max_shrink:
+            det_b = np.row_stack((det_b, detect_face(model, image, bt)))
+            bt *= 2
+        det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
+
+    # Enlarged images are only used to detect small faces.
+    if bt > 1:
+        index = np.where(
+            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
+                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
+        det_b = det_b[index, :]
+    # Shrinked images are only used to detect big faces.
+    else:
+        index = np.where(
+            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
+                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
+        det_b = det_b[index, :]
+    return det_s, det_b
+
+
+def multi_scale_test_pyramid(model, image, max_shrink):
+    # Use image pyramids to detect faces
+    det_b = detect_face(model, image, 0.25)
+    index = np.where(
+        np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
+        > 30)[0]
+    det_b = det_b[index, :]
+
+    st = [0.75, 1.25, 1.5, 1.75]
+    for i in range(len(st)):
+        if st[i] <= max_shrink:
+            det_temp = detect_face(model, image, st[i])
+            # Enlarged images are only used to detect small faces.
+            if st[i] > 1:
+                index = np.where(
+                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
+                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
+                det_temp = det_temp[index, :]
+            # Shrinked images are only used to detect big faces.
+            else:
+                index = np.where(
+                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
+                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
+                det_temp = det_temp[index, :]
+            det_b = np.row_stack((det_b, det_temp))
+    return det_b
+
+
+def to_chw(image):
+    """
+    Transpose image from HWC to CHW.
+    Args:
+        image (np.array): an image with HWC layout.
+    """
+    # HWC to CHW
+    if len(image.shape) == 3:
+        image = np.swapaxes(image, 1, 2)
+        image = np.swapaxes(image, 1, 0)
+    return image
+
+
+def face_img_process(image,
+                     mean=[104., 117., 123.],
+                     std=[127.502231, 127.502231, 127.502231]):
+    img = np.array(image)
+    img = to_chw(img)
+    img = img.astype('float32')
+    img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
+    img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
+    img = [img]
+    img = np.array(img)
+    return img
+
+
+def get_shrink(height, width):
+    """
+    Args:
+        height (int): image height.
+        width (int): image width.
+    """
+    # avoid out of memory
+    max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
+    max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
+
+    def get_round(x, loc):
+        str_x = str(x)
+        if '.' in str_x:
+            str_before, str_after = str_x.split('.')
+            len_after = len(str_after)
+            if len_after >= 3:
+                str_final = str_before + '.' + str_after[0:loc]
+                return float(str_final)
+            else:
+                return x
+
+    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
+    if max_shrink >= 1.5 and max_shrink < 2:
+        max_shrink = max_shrink - 0.1
+    elif max_shrink >= 2 and max_shrink < 3:
+        max_shrink = max_shrink - 0.2
+    elif max_shrink >= 3 and max_shrink < 4:
+        max_shrink = max_shrink - 0.3
+    elif max_shrink >= 4 and max_shrink < 5:
+        max_shrink = max_shrink - 0.4
+    elif max_shrink >= 5:
+        max_shrink = max_shrink - 0.5
+    elif max_shrink <= 0.1:
+        max_shrink = 0.1
+
+    shrink = max_shrink if max_shrink < 1 else 1
+    return shrink, max_shrink
+
+
+def bbox_vote(det):
+    order = det[:, 4].ravel().argsort()[::-1]
+    det = det[order, :]
+    if det.shape[0] == 0:
+        dets = np.array([[10, 10, 20, 20, 0.002]])
+        det = np.empty(shape=[0, 5])
+    while det.shape[0] > 0:
+        # IOU
+        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
+        xx1 = np.maximum(det[0, 0], det[:, 0])
+        yy1 = np.maximum(det[0, 1], det[:, 1])
+        xx2 = np.minimum(det[0, 2], det[:, 2])
+        yy2 = np.minimum(det[0, 3], det[:, 3])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        o = inter / (area[0] + area[:] - inter)
+
+        # nms
+        merge_index = np.where(o >= 0.3)[0]
+        det_accu = det[merge_index, :]
+        det = np.delete(det, merge_index, 0)
+        if merge_index.shape[0] <= 1:
+            if det.shape[0] == 0:
+                try:
+                    dets = np.row_stack((dets, det_accu))
+                except:
+                    dets = det_accu
+            continue
+        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
+        max_score = np.max(det_accu[:, 4])
+        det_accu_sum = np.zeros((1, 5))
+        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
+                                      axis=0) / np.sum(det_accu[:, -1:])
+        det_accu_sum[:, 4] = max_score
+        try:
+            dets = np.row_stack((dets, det_accu_sum))
+        except:
+            dets = det_accu_sum
+    dets = dets[0:750, :]
+    keep_index = np.where(dets[:, 4] >= 0.01)[0]
+    dets = dets[keep_index, :]
+    return dets
+
+
+def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
+    image_name = image_path.split('/')[-1]
+    image_class = image_path.split('/')[-2]
+    odir = os.path.join(output_dir, image_class)
+    if not os.path.exists(odir):
+        os.makedirs(odir)
+
+    ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
+    f = open(ofname, 'w')
+    f.write('{:s}\n'.format(image_class + '/' + image_name))
+    f.write('{:d}\n'.format(bboxes_scores.shape[0]))
+    for box_score in bboxes_scores:
+        xmin, ymin, xmax, ymax, score = box_score
+        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
+            xmax - xmin + 1), (ymax - ymin + 1), score))
+    f.close()
+    logger.info("The predicted result is saved as {}".format(ofname))
+
+
+def save_fddb_bboxes(bboxes_scores,
+                     output_dir,
+                     output_fname='pred_fddb_res.txt'):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    predict_file = os.path.join(output_dir, output_fname)
+    f = open(predict_file, 'w')
+    for image_path, dets in bboxes_scores.iteritems():
+        f.write('{:s}\n'.format(image_path))
+        f.write('{:d}\n'.format(dets.shape[0]))
+        for box_score in dets:
+            xmin, ymin, xmax, ymax, score = box_score
+            width, height = xmax - xmin, ymax - ymin
+            f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
+                    .format(xmin, ymin, width, height, score))
+    logger.info("The predicted result is saved as {}".format(predict_file))
+    return predict_file
+
+
+def lmk2out(results, is_bbox_normalized=False):
+    """
+    Args:
+        results: request a dict, should include: `landmark`, `im_id`,
+                 if is_bbox_normalized=True, also need `im_shape`.
+        is_bbox_normalized: whether or not landmark is normalized.
+    """
+    xywh_res = []
+    for t in results:
+        bboxes = t['bbox'][0]
+        lengths = t['bbox'][1][0]
+        im_ids = np.array(t['im_id'][0]).flatten()
+        if bboxes.shape == (1, 1) or bboxes is None:
+            continue
+        face_index = t['face_index'][0]
+        prior_box = t['prior_boxes'][0]
+        predict_lmk = t['landmark'][0]
+        prior = np.reshape(prior_box, (-1, 4))
+        predictlmk = np.reshape(predict_lmk, (-1, 10))
+
+        k = 0
+        for a in range(len(lengths)):
+            num = lengths[a]
+            im_id = int(im_ids[a])
+            for i in range(num):
+                score = bboxes[k][1]
+                theindex = face_index[i][0]
+                me_prior = prior[theindex, :]
+                lmk_pred = predictlmk[theindex, :]
+                prior_w = me_prior[2] - me_prior[0]
+                prior_h = me_prior[3] - me_prior[1]
+                prior_w_center = (me_prior[2] + me_prior[0]) / 2
+                prior_h_center = (me_prior[3] + me_prior[1]) / 2
+                lmk_decode = np.zeros((10))
+                for j in [0, 2, 4, 6, 8]:
+                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_w_center
+                for j in [1, 3, 5, 7, 9]:
+                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_h_center
+                im_shape = t['im_shape'][0][a].tolist()
+                image_h, image_w = int(im_shape[0]), int(im_shape[1])
+                if is_bbox_normalized:
+                    lmk_decode = lmk_decode * np.array([
+                        image_w, image_h, image_w, image_h, image_w, image_h,
+                        image_w, image_h, image_w, image_h
+                    ])
+                lmk_res = {
+                    'image_id': im_id,
+                    'landmark': lmk_decode,
+                    'score': score,
+                }
+                xywh_res.append(lmk_res)
+                k += 1
+    return xywh_res
--- a/build/lib/ppdet/model_zoo/MODEL_ZOO
+++ b/build/lib/ppdet/model_zoo/MODEL_ZOO
@ -0,0 +1,100 @@
+cascade_rcnn\cascade_mask_rcnn_r50_fpn_1x_coco
+cascade_rcnn\cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco
+cascade_rcnn\cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco
+cascade_rcnn\cascade_rcnn_r50_fpn_1x_coco
+cascade_rcnn\cascade_rcnn_r50_vd_fpn_ssld_1x_coco
+cascade_rcnn\cascade_rcnn_r50_vd_fpn_ssld_2x_coco
+dcn\cascade_rcnn_dcn_r50_fpn_1x_coco
+dcn\cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
+dcn\faster_rcnn_dcn_r101_vd_fpn_1x_coco
+dcn\faster_rcnn_dcn_r50_fpn_1x_coco
+dcn\faster_rcnn_dcn_r50_vd_fpn_1x_coco
+dcn\faster_rcnn_dcn_r50_vd_fpn_2x_coco
+dcn\faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
+dcn\mask_rcnn_dcn_r101_vd_fpn_1x_coco
+dcn\mask_rcnn_dcn_r50_fpn_1x_coco
+dcn\mask_rcnn_dcn_r50_vd_fpn_2x_coco
+dcn\mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
+dota\s2anet_1x_dota
+dota\s2anet_conv_1x_dota
+face_detection\blazeface_1000e
+face_detection\blazeface_fpn_ssh_1000e
+faster_rcnn\faster_rcnn_r101_1x_coco
+faster_rcnn\faster_rcnn_r101_fpn_1x_coco
+faster_rcnn\faster_rcnn_r101_fpn_2x_coco
+faster_rcnn\faster_rcnn_r101_vd_fpn_1x_coco
+faster_rcnn\faster_rcnn_r101_vd_fpn_2x_coco
+faster_rcnn\faster_rcnn_r34_fpn_1x_coco
+faster_rcnn\faster_rcnn_r34_vd_fpn_1x_coco
+faster_rcnn\faster_rcnn_r50_1x_coco
+faster_rcnn\faster_rcnn_r50_fpn_1x_coco
+faster_rcnn\faster_rcnn_r50_fpn_2x_coco
+faster_rcnn\faster_rcnn_r50_vd_1x_coco
+faster_rcnn\faster_rcnn_r50_vd_fpn_1x_coco
+faster_rcnn\faster_rcnn_r50_vd_fpn_2x_coco
+faster_rcnn\faster_rcnn_r50_vd_fpn_ssld_1x_coco
+faster_rcnn\faster_rcnn_r50_vd_fpn_ssld_2x_coco
+faster_rcnn\faster_rcnn_x101_vd_64x4d_fpn_1x_coco
+faster_rcnn\faster_rcnn_x101_vd_64x4d_fpn_2x_coco
+fcos\fcos_dcn_r50_fpn_1x_coco
+fcos\fcos_r50_fpn_1x_coco
+fcos\fcos_r50_fpn_multiscale_2x_coco
+gn\cascade_mask_rcnn_r50_fpn_gn_2x_coco
+gn\cascade_rcnn_r50_fpn_gn_2x_coco
+gn\faster_rcnn_r50_fpn_gn_2x_coco
+gn\mask_rcnn_r50_fpn_gn_2x_coco
+hrnet\faster_rcnn_hrnetv2p_w18_1x_coco
+hrnet\faster_rcnn_hrnetv2p_w18_2x_coco
+mask_rcnn\mask_rcnn_r101_fpn_1x_coco
+mask_rcnn\mask_rcnn_r101_vd_fpn_1x_coco
+mask_rcnn\mask_rcnn_r50_1x_coco
+mask_rcnn\mask_rcnn_r50_2x_coco
+mask_rcnn\mask_rcnn_r50_fpn_1x_coco
+mask_rcnn\mask_rcnn_r50_fpn_2x_coco
+mask_rcnn\mask_rcnn_r50_vd_fpn_1x_coco
+mask_rcnn\mask_rcnn_r50_vd_fpn_2x_coco
+mask_rcnn\mask_rcnn_r50_vd_fpn_ssld_1x_coco
+mask_rcnn\mask_rcnn_r50_vd_fpn_ssld_2x_coco
+mask_rcnn\mask_rcnn_x101_vd_64x4d_fpn_1x_coco
+mask_rcnn\mask_rcnn_x101_vd_64x4d_fpn_2x_coco
+pedestrian\pedestrian_yolov3_darknet
+ppyolo\ppyolov2_r101vd_dcn_365e_coco
+ppyolo\ppyolov2_r50vd_dcn_365e_coco
+ppyolo\ppyolov2_r50vd_dcn_voc
+ppyolo\ppyolo_mbv3_large_coco
+ppyolo\ppyolo_mbv3_small_coco
+ppyolo\ppyolo_r18vd_coco
+ppyolo\ppyolo_r50vd_dcn_1x_coco
+ppyolo\ppyolo_r50vd_dcn_1x_minicoco
+ppyolo\ppyolo_r50vd_dcn_2x_coco
+ppyolo\ppyolo_r50vd_dcn_voc
+ppyolo\ppyolo_test
+ppyolo\ppyolo_tiny_650e_coco
+rcnn_enhance\faster_rcnn_enhance_3x_coco
+res2net\faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco
+res2net\mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco
+res2net\mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco
+solov2\solov2_r50_fpn_1x_coco
+solov2\solov2_r50_fpn_3x_coco
+ssd\ssdlite_ghostnet_320_coco
+ssd\ssdlite_mobilenet_v1_300_coco
+ssd\ssdlite_mobilenet_v3_large_320_coco
+ssd\ssdlite_mobilenet_v3_small_320_coco
+ssd\ssd_mobilenet_v1_300_120e_voc
+ssd\ssd_vgg16_300_240e_voc
+ttfnet\pafnet_10x_coco
+ttfnet\pafnet_lite_mobilenet_v3_20x_coco
+ttfnet\ttfnet_darknet53_1x_coco
+vehicle\vehicle_yolov3_darknet
+yolov3\yolov3_darknet53_270e_coco
+yolov3\yolov3_darknet53_270e_voc
+yolov3\yolov3_mobilenet_v1_270e_coco
+yolov3\yolov3_mobilenet_v1_270e_voc
+yolov3\yolov3_mobilenet_v1_roadsign
+yolov3\yolov3_mobilenet_v1_ssld_270e_coco
+yolov3\yolov3_mobilenet_v1_ssld_270e_voc
+yolov3\yolov3_mobilenet_v3_large_270e_coco
+yolov3\yolov3_mobilenet_v3_large_270e_voc
+yolov3\yolov3_mobilenet_v3_large_ssld_270e_voc
+yolov3\yolov3_r34_270e_coco
+yolov3\yolov3_r50vd_dcn_270e_coco
--- a/build/lib/ppdet/model_zoo/init.py
+++ b/build/lib/ppdet/model_zoo/init.py
@ -0,0 +1,18 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from . import model_zoo
+from .model_zoo import *
+
+__all__ = model_zoo.__all__
--- a/build/lib/ppdet/model_zoo/model_zoo.py
+++ b/build/lib/ppdet/model_zoo/model_zoo.py
@ -0,0 +1,84 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os.path as osp
+import pkg_resources
+
+try:
+    from collections.abc import Sequence
+except:
+    from collections import Sequence
+
+from ppdet.core.workspace import load_config, create
+from ppdet.utils.checkpoint import load_weight
+from ppdet.utils.download import get_config_path
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'list_model', 'get_config_file', 'get_weights_url', 'get_model',
+    'MODEL_ZOO_FILENAME'
+]
+
+MODEL_ZOO_FILENAME = 'MODEL_ZOO'
+
+
+def list_model(filters=[]):
+    model_zoo_file = pkg_resources.resource_filename('ppdet.model_zoo',
+                                                     MODEL_ZOO_FILENAME)
+    with open(model_zoo_file) as f:
+        model_names = f.read().splitlines()
+
+    # filter model_name
+    def filt(name):
+        for f in filters:
+            if name.find(f) < 0:
+                return False
+        return True
+
+    if isinstance(filters, str) or not isinstance(filters, Sequence):
+        filters = [filters]
+    model_names = [name for name in model_names if filt(name)]
+    if len(model_names) == 0 and len(filters) > 0:
+        raise ValueError("no model found, please check filters seeting, "
+                         "filters can be set as following kinds:\n"
+                         "\tDataset: coco, voc ...\n"
+                         "\tArchitecture: yolo, rcnn, ssd ...\n"
+                         "\tBackbone: resnet, vgg, darknet ...\n")
+
+    model_str = "Available Models:\n"
+    for model_name in model_names:
+        model_str += "\t{}\n".format(model_name)
+    logger.info(model_str)
+
+
+# models and configs save on bcebos under dygraph directory
+def get_config_file(model_name):
+    return get_config_path("ppdet://configs/{}.yml".format(model_name))
+
+
+def get_weights_url(model_name):
+    return "ppdet://models/{}.pdparams".format(osp.split(model_name)[-1])
+
+
+def get_model(model_name, pretrained=True):
+    cfg_file = get_config_file(model_name)
+    cfg = load_config(cfg_file)
+    model = create(cfg.architecture)
+
+    if pretrained:
+        load_weight(model, get_weights_url(model_name))
+
+    return model
--- a/build/lib/ppdet/model_zoo/tests/init.py
+++ b/build/lib/ppdet/model_zoo/tests/init.py
@ -0,0 +1,13 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/build/lib/ppdet/model_zoo/tests/test_get_model.py
+++ b/build/lib/ppdet/model_zoo/tests/test_get_model.py
@ -0,0 +1,48 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import paddle
+import ppdet
+import unittest
+
+# NOTE: weights downloading costs time, we choose
+#       a small model for unittesting
+MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
+
+
+class TestGetConfigFile(unittest.TestCase):
+    def test_main(self):
+        try:
+            cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
+            assert os.path.isfile(cfg_file)
+        except:
+            self.assertTrue(False)
+
+
+class TestGetModel(unittest.TestCase):
+    def test_main(self):
+        try:
+            model = ppdet.model_zoo.get_model(MODEL_NAME)
+            assert isinstance(model, paddle.nn.Layer)
+        except:
+            self.assertTrue(False)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/build/lib/ppdet/model_zoo/tests/test_list_model.py
+++ b/build/lib/ppdet/model_zoo/tests/test_list_model.py
@ -0,0 +1,68 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import ppdet
+
+
+class TestListModel(unittest.TestCase):
+    def setUp(self):
+        self._filter = []
+
+    def test_main(self):
+        try:
+            ppdet.model_zoo.list_model(self._filter)
+            self.assertTrue(True)
+        except:
+            self.assertTrue(False)
+
+
+class TestListModelYOLO(TestListModel):
+    def setUp(self):
+        self._filter = ['yolo']
+
+
+class TestListModelRCNN(TestListModel):
+    def setUp(self):
+        self._filter = ['rcnn']
+
+
+class TestListModelSSD(TestListModel):
+    def setUp(self):
+        self._filter = ['ssd']
+
+
+class TestListModelMultiFilter(TestListModel):
+    def setUp(self):
+        self._filter = ['yolo', 'darknet']
+
+
+class TestListModelError(unittest.TestCase):
+    def setUp(self):
+        self._filter = ['xxx']
+
+    def test_main(self):
+        try:
+            ppdet.model_zoo.list_model(self._filter)
+            self.assertTrue(False)
+        except ValueError:
+            self.assertTrue(True)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/build/lib/ppdet/modeling/init.py
+++ b/build/lib/ppdet/modeling/init.py
@ -0,0 +1,41 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import warnings
+warnings.filterwarnings(
+    action='ignore', category=DeprecationWarning, module='ops')
+
+from . import ops
+from . import backbones
+from . import necks
+from . import proposal_generator
+from . import heads
+from . import losses
+from . import architectures
+from . import post_process
+from . import layers
+from . import reid
+from . import mot
+
+from .ops import *
+from .backbones import *
+from .necks import *
+from .proposal_generator import *
+from .heads import *
+from .losses import *
+from .architectures import *
+from .post_process import *
+from .layers import *
+from .reid import *
+from .mot import *
--- a/build/lib/ppdet/modeling/architectures/init.py
+++ b/build/lib/ppdet/modeling/architectures/init.py
@ -0,0 +1,41 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+from . import meta_arch
+from . import faster_rcnn
+from . import mask_rcnn
+from . import yolo
+from . import cascade_rcnn
+from . import ssd
+from . import fcos
+from . import solov2
+from . import ttfnet
+from . import s2anet
+from . import keypoint_hrhrnet
+from . import keypoint_hrnet
+from . import jde
+from . import deepsort
+from . import fairmot
+from . import centernet
+
+from .meta_arch import *
+from .faster_rcnn import *
+from .mask_rcnn import *
+from .yolo import *
+from .cascade_rcnn import *
+from .ssd import *
+from .fcos import *
+from .solov2 import *
+from .ttfnet import *
+from .s2anet import *
+from .keypoint_hrhrnet import *
+from .keypoint_hrnet import *
+from .jde import *
+from .deepsort import *
+from .fairmot import *
+from .centernet import *
+from .blazeface import *
--- a/build/lib/ppdet/modeling/architectures/blazeface.py
+++ b/build/lib/ppdet/modeling/architectures/blazeface.py
@ -0,0 +1,91 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['BlazeFace']
+
+
+@register
+class BlazeFace(BaseArch):
+    """
+    BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
+               see https://arxiv.org/abs/1907.05047
+
+    Args:
+        backbone (nn.Layer): backbone instance
+        neck (nn.Layer): neck instance
+        blaze_head (nn.Layer): `blazeHead` instance
+        post_process (object): `BBoxPostProcess` instance
+    """
+
+    __category__ = 'architecture'
+    __inject__ = ['post_process']
+
+    def __init__(self, backbone, blaze_head, neck, post_process):
+        super(BlazeFace, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.blaze_head = blaze_head
+        self.post_process = post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+        # fpn
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+        # head
+        kwargs = {'input_shape': neck.out_shape}
+        blaze_head = create(cfg['blaze_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            'blaze_head': blaze_head,
+        }
+
+    def _forward(self):
+        # Backbone
+        body_feats = self.backbone(self.inputs)
+        # neck
+        neck_feats = self.neck(body_feats)
+        # blaze Head
+        if self.training:
+            return self.blaze_head(neck_feats, self.inputs['image'],
+                                   self.inputs['gt_bbox'],
+                                   self.inputs['gt_class'])
+        else:
+            preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
+            bbox, bbox_num = self.post_process(preds, anchors,
+                                               self.inputs['im_shape'],
+                                               self.inputs['scale_factor'])
+            return bbox, bbox_num
+
+    def get_loss(self, ):
+        return {"loss": self._forward()}
+
+    def get_pred(self):
+        bbox_pred, bbox_num = self._forward()
+        output = {
+            "bbox": bbox_pred,
+            "bbox_num": bbox_num,
+        }
+        return output
--- a/build/lib/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/cascade_rcnn.py
@ -0,0 +1,143 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['CascadeRCNN']
+
+
+@register
+class CascadeRCNN(BaseArch):
+    """
+    Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
+
+    Args:
+        backbone (object): backbone instance
+        rpn_head (object): `RPNHead` instance
+        bbox_head (object): `BBoxHead` instance
+        bbox_post_process (object): `BBoxPostProcess` instance
+        neck (object): 'FPN' instance
+        mask_head (object): `MaskHead` instance
+        mask_post_process (object): `MaskPostProcess` instance
+    """
+    __category__ = 'architecture'
+    __inject__ = [
+        'bbox_post_process',
+        'mask_post_process',
+    ]
+
+    def __init__(self,
+                 backbone,
+                 rpn_head,
+                 bbox_head,
+                 bbox_post_process,
+                 neck=None,
+                 mask_head=None,
+                 mask_post_process=None):
+        super(CascadeRCNN, self).__init__()
+        self.backbone = backbone
+        self.rpn_head = rpn_head
+        self.bbox_head = bbox_head
+        self.bbox_post_process = bbox_post_process
+        self.neck = neck
+        self.mask_head = mask_head
+        self.mask_post_process = mask_post_process
+        self.with_mask = mask_head is not None
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
+
+        out_shape = neck and neck.out_shape or backbone.out_shape
+        kwargs = {'input_shape': out_shape}
+        rpn_head = create(cfg['rpn_head'], **kwargs)
+        bbox_head = create(cfg['bbox_head'], **kwargs)
+
+        out_shape = neck and out_shape or bbox_head.get_head().out_shape
+        kwargs = {'input_shape': out_shape}
+        mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "rpn_head": rpn_head,
+            "bbox_head": bbox_head,
+            "mask_head": mask_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        if self.neck is not None:
+            body_feats = self.neck(body_feats)
+
+        if self.training:
+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
+            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
+                                                  self.inputs)
+            rois, rois_num = self.bbox_head.get_assigned_rois()
+            bbox_targets = self.bbox_head.get_assigned_targets()
+            if self.with_mask:
+                mask_loss = self.mask_head(body_feats, rois, rois_num,
+                                           self.inputs, bbox_targets, bbox_feat)
+                return rpn_loss, bbox_loss, mask_loss
+            else:
+                return rpn_loss, bbox_loss, {}
+        else:
+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
+            preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
+            refined_rois = self.bbox_head.get_refined_rois()
+
+            im_shape = self.inputs['im_shape']
+            scale_factor = self.inputs['scale_factor']
+
+            bbox, bbox_num = self.bbox_post_process(
+                preds, (refined_rois, rois_num), im_shape, scale_factor)
+            # rescale the prediction back to origin image
+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
+                                                        im_shape, scale_factor)
+            if not self.with_mask:
+                return bbox_pred, bbox_num, None
+            mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
+            origin_shape = self.bbox_post_process.get_origin_shape()
+            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
+                                               bbox_num, origin_shape)
+            return bbox_pred, bbox_num, mask_pred
+
+    def get_loss(self, ):
+        rpn_loss, bbox_loss, mask_loss = self._forward()
+        loss = {}
+        loss.update(rpn_loss)
+        loss.update(bbox_loss)
+        if self.with_mask:
+            loss.update(mask_loss)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        bbox_pred, bbox_num, mask_pred = self._forward()
+        output = {
+            'bbox': bbox_pred,
+            'bbox_num': bbox_num,
+        }
+        if self.with_mask:
+            output.update({'mask': mask_pred})
+        return output
--- a/build/lib/ppdet/modeling/architectures/centernet.py
+++ b/build/lib/ppdet/modeling/architectures/centernet.py
@ -0,0 +1,102 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['CenterNet']
+
+
+@register
+class CenterNet(BaseArch):
+    """
+    CenterNet network, see http://arxiv.org/abs/1904.07850
+
+    Args:
+        backbone (object): backbone instance
+        neck (object): 'CenterDLAFPN' instance
+        head (object): 'CenterHead' instance
+        post_process (object): 'CenterNetPostProcess' instance
+        for_mot (bool): whether return other features used in tracking model
+
+    """
+    __category__ = 'architecture'
+    __inject__ = ['post_process']
+
+    def __init__(self,
+                 backbone='DLA',
+                 neck='CenterDLAFPN',
+                 head='CenterHead',
+                 post_process='CenterNetPostProcess',
+                 for_mot=False):
+        super(CenterNet, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.head = head
+        self.post_process = post_process
+        self.for_mot = for_mot
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        kwargs = {'input_shape': neck.out_shape}
+        head = create(cfg['head'], **kwargs)
+
+        return {'backbone': backbone, 'neck': neck, "head": head}
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        neck_feat = self.neck(body_feats)
+        head_out = self.head(neck_feat, self.inputs)
+        if self.for_mot:
+            head_out.update({'neck_feat': neck_feat})
+        return head_out
+
+    def get_pred(self):
+        head_out = self._forward()
+        if self.for_mot:
+            bbox, bbox_inds = self.post_process(
+                head_out['heatmap'],
+                head_out['size'],
+                head_out['offset'],
+                im_shape=self.inputs['im_shape'],
+                scale_factor=self.inputs['scale_factor'])
+            output = {
+                "bbox": bbox,
+                "bbox_inds": bbox_inds,
+                "neck_feat": head_out['neck_feat']
+            }
+        else:
+            bbox, bbox_num = self.post_process(
+                head_out['heatmap'],
+                head_out['size'],
+                head_out['offset'],
+                im_shape=self.inputs['im_shape'],
+                scale_factor=self.inputs['scale_factor'])
+            output = {
+                "bbox": bbox,
+                "bbox_num": bbox_num,
+            }
+        return output
+
+    def get_loss(self):
+        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/deepsort.py
+++ b/build/lib/ppdet/modeling/architectures/deepsort.py
@ -0,0 +1,111 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
+
+__all__ = ['DeepSORT']
+
+
+@register
+class DeepSORT(BaseArch):
+    """
+    DeepSORT network, see https://arxiv.org/abs/1703.07402
+
+    Args:
+        detector (object): detector model instance
+        reid (object): reid model instance
+        tracker (object): tracker instance
+    """
+    __category__ = 'architecture'
+
+    def __init__(self,
+                 detector='YOLOv3',
+                 reid='PCBPyramid',
+                 tracker='DeepSORTTracker'):
+        super(DeepSORT, self).__init__()
+        self.detector = detector
+        self.reid = reid
+        self.tracker = tracker
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        if cfg['detector'] != 'None':
+            detector = create(cfg['detector'])
+        else:
+            detector = None
+        reid = create(cfg['reid'])
+        tracker = create(cfg['tracker'])
+
+        return {
+            "detector": detector,
+            "reid": reid,
+            "tracker": tracker,
+        }
+
+    def _forward(self):
+        assert 'ori_image' in self.inputs
+        load_dets = 'pred_bboxes' in self.inputs and 'pred_scores' in self.inputs
+
+        ori_image = self.inputs['ori_image']
+        input_shape = self.inputs['image'].shape[2:]
+        im_shape = self.inputs['im_shape']
+        scale_factor = self.inputs['scale_factor']
+
+        if self.detector and not load_dets:
+            outs = self.detector(self.inputs)
+            if outs['bbox_num'] > 0:
+                pred_bboxes = scale_coords(outs['bbox'][:, 2:], input_shape,
+                                           im_shape, scale_factor)
+                pred_scores = outs['bbox'][:, 1:2]
+            else:
+                pred_bboxes = []
+                pred_scores = []
+        else:
+            pred_bboxes = self.inputs['pred_bboxes']
+            pred_scores = self.inputs['pred_scores']
+
+        if len(pred_bboxes) > 0:
+            pred_bboxes = clip_box(pred_bboxes, input_shape, im_shape,
+                                   scale_factor)
+            bbox_tlwh = paddle.concat(
+                (pred_bboxes[:, 0:2],
+                 pred_bboxes[:, 2:4] - pred_bboxes[:, 0:2] + 1),
+                axis=1)
+
+            crops, pred_scores = get_crops(
+                pred_bboxes, ori_image, pred_scores, w=64, h=192)
+
+            if len(crops) > 0:
+                features = self.reid(paddle.to_tensor(crops))
+                detections = [Detection(bbox_tlwh[i], conf, features[i])\
+                                        for i, conf in enumerate(pred_scores)]
+            else:
+                detections = []
+        else:
+            detections = []
+
+        self.tracker.predict()
+        online_targets = self.tracker.update(detections)
+
+        return online_targets
+
+    def get_pred(self):
+        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/fairmot.py
+++ b/build/lib/ppdet/modeling/architectures/fairmot.py
@ -0,0 +1,107 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['FairMOT']
+
+
+@register
+class FairMOT(BaseArch):
+    """
+    FairMOT network, see http://arxiv.org/abs/2004.01888
+
+    Args:
+        detector (object): 'CenterNet' instance
+        reid (object): 'FairMOTEmbeddingHead' instance
+        tracker (object): 'JDETracker' instance
+        loss (object): 'FairMOTLoss' instance
+
+    """
+
+    __category__ = 'architecture'
+    __inject__ = ['loss']
+
+    def __init__(self,
+                 detector='CenterNet',
+                 reid='FairMOTEmbeddingHead',
+                 tracker='JDETracker',
+                 loss='FairMOTLoss'):
+        super(FairMOT, self).__init__()
+        self.detector = detector
+        self.reid = reid
+        self.tracker = tracker
+        self.loss = loss
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        detector = create(cfg['detector'])
+
+        kwargs = {'input_shape': detector.neck.out_shape}
+        reid = create(cfg['reid'], **kwargs)
+        loss = create(cfg['loss'])
+        tracker = create(cfg['tracker'])
+
+        return {
+            'detector': detector,
+            'reid': reid,
+            'loss': loss,
+            'tracker': tracker
+        }
+
+    def _forward(self):
+        loss = dict()
+        # det_outs keys:
+        # train: det_loss, heatmap_loss, size_loss, offset_loss, neck_feat
+        # eval/infer: bbox, bbox_inds, neck_feat
+        det_outs = self.detector(self.inputs)
+        neck_feat = det_outs['neck_feat']
+        if self.training:
+            reid_loss = self.reid(neck_feat, self.inputs)
+
+            det_loss = det_outs['det_loss']
+            loss = self.loss(det_loss, reid_loss)
+            loss.update({
+                'heatmap_loss': det_outs['heatmap_loss'],
+                'size_loss': det_outs['size_loss'],
+                'offset_loss': det_outs['offset_loss'],
+                'reid_loss': reid_loss
+            })
+            return loss
+        else:
+            embedding = self.reid(neck_feat, self.inputs)
+            bbox_inds = det_outs['bbox_inds']
+            embedding = paddle.transpose(embedding, [0, 2, 3, 1])
+            embedding = paddle.reshape(embedding,
+                                       [-1, paddle.shape(embedding)[-1]])
+            id_feature = paddle.gather(embedding, bbox_inds)
+            dets = det_outs['bbox']
+            id_feature = id_feature
+            # Note: the tracker only considers batch_size=1 and num_classses=1
+            online_targets = self.tracker.update(dets, id_feature)
+            return online_targets
+
+    def get_pred(self):
+        output = self._forward()
+        return output
+
+    def get_loss(self):
+        loss = self._forward()
+        return loss
--- a/build/lib/ppdet/modeling/architectures/faster_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/faster_rcnn.py
@ -0,0 +1,106 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['FasterRCNN']
+
+
+@register
+class FasterRCNN(BaseArch):
+    """
+    Faster R-CNN network, see https://arxiv.org/abs/1506.01497
+
+    Args:
+        backbone (object): backbone instance
+        rpn_head (object): `RPNHead` instance
+        bbox_head (object): `BBoxHead` instance
+        bbox_post_process (object): `BBoxPostProcess` instance
+        neck (object): 'FPN' instance
+    """
+    __category__ = 'architecture'
+    __inject__ = ['bbox_post_process']
+
+    def __init__(self,
+                 backbone,
+                 rpn_head,
+                 bbox_head,
+                 bbox_post_process,
+                 neck=None):
+        super(FasterRCNN, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.rpn_head = rpn_head
+        self.bbox_head = bbox_head
+        self.bbox_post_process = bbox_post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
+
+        out_shape = neck and neck.out_shape or backbone.out_shape
+        kwargs = {'input_shape': out_shape}
+        rpn_head = create(cfg['rpn_head'], **kwargs)
+        bbox_head = create(cfg['bbox_head'], **kwargs)
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "rpn_head": rpn_head,
+            "bbox_head": bbox_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        if self.neck is not None:
+            body_feats = self.neck(body_feats)
+        if self.training:
+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
+            bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
+                                          self.inputs)
+            return rpn_loss, bbox_loss
+        else:
+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
+            preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
+
+            im_shape = self.inputs['im_shape']
+            scale_factor = self.inputs['scale_factor']
+            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
+                                                    im_shape, scale_factor)
+
+            # rescale the prediction back to origin image
+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
+                                                        im_shape, scale_factor)
+            return bbox_pred, bbox_num
+
+    def get_loss(self, ):
+        rpn_loss, bbox_loss = self._forward()
+        loss = {}
+        loss.update(rpn_loss)
+        loss.update(bbox_loss)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        bbox_pred, bbox_num = self._forward()
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
+        return output
--- a/build/lib/ppdet/modeling/architectures/fcos.py
+++ b/build/lib/ppdet/modeling/architectures/fcos.py
@ -0,0 +1,105 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['FCOS']
+
+
+@register
+class FCOS(BaseArch):
+    """
+    FCOS network, see https://arxiv.org/abs/1904.01355
+
+    Args:
+        backbone (object): backbone instance
+        neck (object): 'FPN' instance
+        fcos_head (object): 'FCOSHead' instance
+        post_process (object): 'FCOSPostProcess' instance
+    """
+
+    __category__ = 'architecture'
+    __inject__ = ['fcos_post_process']
+
+    def __init__(self,
+                 backbone,
+                 neck,
+                 fcos_head='FCOSHead',
+                 fcos_post_process='FCOSPostProcess'):
+        super(FCOS, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.fcos_head = fcos_head
+        self.fcos_post_process = fcos_post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        kwargs = {'input_shape': neck.out_shape}
+        fcos_head = create(cfg['fcos_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "fcos_head": fcos_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        fpn_feats = self.neck(body_feats)
+        fcos_head_outs = self.fcos_head(fpn_feats, self.training)
+        if not self.training:
+            scale_factor = self.inputs['scale_factor']
+            bboxes = self.fcos_post_process(fcos_head_outs, scale_factor)
+            return bboxes
+        else:
+            return fcos_head_outs
+
+    def get_loss(self, ):
+        loss = {}
+        tag_labels, tag_bboxes, tag_centerness = [], [], []
+        for i in range(len(self.fcos_head.fpn_stride)):
+            # labels, reg_target, centerness
+            k_lbl = 'labels{}'.format(i)
+            if k_lbl in self.inputs:
+                tag_labels.append(self.inputs[k_lbl])
+            k_box = 'reg_target{}'.format(i)
+            if k_box in self.inputs:
+                tag_bboxes.append(self.inputs[k_box])
+            k_ctn = 'centerness{}'.format(i)
+            if k_ctn in self.inputs:
+                tag_centerness.append(self.inputs[k_ctn])
+
+        fcos_head_outs = self._forward()
+        loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
+                                            tag_bboxes, tag_centerness)
+        loss.update(loss_fcos)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        bbox_pred, bbox_num = self._forward()
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
+        return output
--- a/build/lib/ppdet/modeling/architectures/jde.py
+++ b/build/lib/ppdet/modeling/architectures/jde.py
@ -0,0 +1,124 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.modeling.mot.utils import scale_coords
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['JDE']
+
+
+@register
+class JDE(BaseArch):
+    __category__ = 'architecture'
+    __shared__ = ['metric']
+    """
+    JDE network, see https://arxiv.org/abs/1909.12605v1
+
+    Args:
+        detector (object): detector model instance
+        reid (object): reid model instance
+        tracker (object): tracker instance
+        metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
+            for ReID embedding evaluation, or 'MOT' for multi object tracking
+            evaluation。
+    """
+
+    def __init__(self,
+                 detector='YOLOv3',
+                 reid='JDEEmbeddingHead',
+                 tracker='JDETracker',
+                 metric='MOT'):
+        super(JDE, self).__init__()
+        self.detector = detector
+        self.reid = reid
+        self.tracker = tracker
+        self.metric = metric
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        detector = create(cfg['detector'])
+        kwargs = {'input_shape': detector.neck.out_shape}
+
+        reid = create(cfg['reid'], **kwargs)
+
+        tracker = create(cfg['tracker'])
+
+        return {
+            "detector": detector,
+            "reid": reid,
+            "tracker": tracker,
+        }
+
+    def _forward(self):
+        det_outs = self.detector(self.inputs)
+
+        if self.training:
+            emb_feats = det_outs['emb_feats']
+            loss_confs = det_outs['det_losses']['loss_confs']
+            loss_boxes = det_outs['det_losses']['loss_boxes']
+            jde_losses = self.reid(emb_feats, self.inputs, loss_confs,
+                                   loss_boxes)
+            return jde_losses
+        else:
+            if self.metric == 'MOTDet':
+                det_results = {
+                    'bbox': det_outs['bbox'],
+                    'bbox_num': det_outs['bbox_num'],
+                }
+                return det_results
+
+            elif self.metric == 'ReID':
+                emb_feats = det_outs['emb_feats']
+                embs_and_gts = self.reid(emb_feats, self.inputs, test_emb=True)
+                return embs_and_gts
+
+            elif self.metric == 'MOT':
+                emb_feats = det_outs['emb_feats']
+                emb_outs = self.reid(emb_feats, self.inputs)
+
+                boxes_idx = det_outs['boxes_idx']
+                bbox = det_outs['bbox']
+
+                input_shape = self.inputs['image'].shape[2:]
+                im_shape = self.inputs['im_shape']
+                scale_factor = self.inputs['scale_factor']
+
+                bbox[:, 2:] = scale_coords(bbox[:, 2:], input_shape, im_shape,
+                                           scale_factor)
+
+                nms_keep_idx = det_outs['nms_keep_idx']
+
+                pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1)
+
+                emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
+                pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
+
+                online_targets = self.tracker.update(pred_dets, pred_embs)
+                return online_targets
+
+            else:
+                raise ValueError("Unknown metric {} for multi object tracking.".
+                                 format(self.metric))
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/build/lib/ppdet/modeling/architectures/keypoint_hrhrnet.py
@ -0,0 +1,286 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy.optimize import linear_sum_assignment
+from collections import abc, defaultdict
+import numpy as np
+import paddle
+
+from ppdet.core.workspace import register, create, serializable
+from .meta_arch import BaseArch
+from .. import layers as L
+from ..keypoint_utils import transpred
+
+__all__ = ['HigherHRNet']
+
+
+@register
+class HigherHRNet(BaseArch):
+    __category__ = 'architecture'
+
+    def __init__(self,
+                 backbone='HRNet',
+                 hrhrnet_head='HigherHRNetHead',
+                 post_process='HrHRNetPostProcess',
+                 eval_flip=True,
+                 flip_perm=None,
+                 max_num_people=30):
+        """
+        HigherHRNet network, see https://arxiv.org/abs/1908.10357；
+        HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
+
+        Args:
+            backbone (nn.Layer): backbone instance
+            hrhrnet_head (nn.Layer): keypoint_head instance
+            bbox_post_process (object): `BBoxPostProcess` instance
+        """
+        super(HigherHRNet, self).__init__()
+        self.backbone = backbone
+        self.hrhrnet_head = hrhrnet_head
+        self.post_process = post_process
+        self.flip = eval_flip
+        self.flip_perm = paddle.to_tensor(flip_perm)
+        self.deploy = False
+        self.interpolate = L.Upsample(2, mode='bilinear')
+        self.pool = L.MaxPool(5, 1, 2)
+        self.max_num_people = max_num_people
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+        # head
+        kwargs = {'input_shape': backbone.out_shape}
+        hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
+        post_process = create(cfg['post_process'])
+
+        return {
+            'backbone': backbone,
+            "hrhrnet_head": hrhrnet_head,
+            "post_process": post_process,
+        }
+
+    def _forward(self):
+        if self.flip and not self.training and not self.deploy:
+            self.inputs['image'] = paddle.concat(
+                (self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
+        body_feats = self.backbone(self.inputs)
+
+        if self.training:
+            return self.hrhrnet_head(body_feats, self.inputs)
+        else:
+            outputs = self.hrhrnet_head(body_feats)
+
+            if self.flip and not self.deploy:
+                outputs = [paddle.split(o, 2) for o in outputs]
+                output_rflip = [
+                    paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
+                    for o in outputs
+                ]
+                output1 = [o[0] for o in outputs]
+                heatmap = (output1[0] + output_rflip[0]) / 2.
+                tagmaps = [output1[1], output_rflip[1]]
+                outputs = [heatmap] + tagmaps
+            outputs = self.get_topk(outputs)
+
+            if self.deploy:
+                return outputs
+
+            res_lst = []
+            h = self.inputs['im_shape'][0, 0].numpy().item()
+            w = self.inputs['im_shape'][0, 1].numpy().item()
+            kpts, scores = self.post_process(*outputs, h, w)
+            res_lst.append([kpts, scores])
+            return res_lst
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        outputs = {}
+        res_lst = self._forward()
+        outputs['keypoint'] = res_lst
+        return outputs
+
+    def get_topk(self, outputs):
+        # resize to image size
+        outputs = [self.interpolate(x) for x in outputs]
+        if len(outputs) == 3:
+            tagmap = paddle.concat(
+                (outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
+        else:
+            tagmap = outputs[1].unsqueeze(4)
+
+        heatmap = outputs[0]
+        N, J = 1, self.hrhrnet_head.num_joints
+        heatmap_maxpool = self.pool(heatmap)
+        # topk
+        maxmap = heatmap * (heatmap == heatmap_maxpool)
+        maxmap = maxmap.reshape([N, J, -1])
+        heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
+
+        outputs = [heatmap, tagmap, heat_k, inds_k]
+        return outputs
+
+
+@register
+@serializable
+class HrHRNetPostProcess(object):
+    '''
+    HrHRNet postprocess contain:
+        1) get topk keypoints in the output heatmap
+        2) sample the tagmap's value corresponding to each of the topk coordinate
+        3) match different joints to combine to some people with Hungary algorithm
+        4) adjust the coordinate by +-0.25 to decrease error std
+        5) salvage missing joints by check positivity of heatmap - tagdiff_norm
+    Args:
+        max_num_people (int): max number of people support in postprocess
+        heat_thresh (float): value of topk below this threshhold will be ignored
+        tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
+
+        inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
+        original_height, original_width (float): the original image size
+    '''
+
+    def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
+        self.max_num_people = max_num_people
+        self.heat_thresh = heat_thresh
+        self.tag_thresh = tag_thresh
+
+    def lerp(self, j, y, x, heatmap):
+        H, W = heatmap.shape[-2:]
+        left = np.clip(x - 1, 0, W - 1)
+        right = np.clip(x + 1, 0, W - 1)
+        up = np.clip(y - 1, 0, H - 1)
+        down = np.clip(y + 1, 0, H - 1)
+        offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
+                            -0.25)
+        offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
+                            -0.25)
+        return offset_y + 0.5, offset_x + 0.5
+
+    def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
+                 original_width):
+
+        N, J, H, W = heatmap.shape
+        assert N == 1, "only support batch size 1"
+        heatmap = heatmap[0].cpu().detach().numpy()
+        tagmap = tagmap[0].cpu().detach().numpy()
+        heats = heat_k[0].cpu().detach().numpy()
+        inds_np = inds_k[0].cpu().detach().numpy()
+        y = inds_np // W
+        x = inds_np % W
+        tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people),
+                      y.flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
+        coords = np.stack((y, x), axis=2)
+        # threshold
+        mask = heats > self.heat_thresh
+        # cluster
+        cluster = defaultdict(lambda: {
+            'coords': np.zeros((J, 2), dtype=np.float32),
+            'scores': np.zeros(J, dtype=np.float32),
+            'tags': []
+        })
+        for jid, m in enumerate(mask):
+            num_valid = m.sum()
+            if num_valid == 0:
+                continue
+            valid_inds = np.where(m)[0]
+            valid_tags = tags[jid, m, :]
+            if len(cluster) == 0:  # initialize
+                for i in valid_inds:
+                    tag = tags[jid, i]
+                    key = tag[0]
+                    cluster[key]['tags'].append(tag)
+                    cluster[key]['scores'][jid] = heats[jid, i]
+                    cluster[key]['coords'][jid] = coords[jid, i]
+                continue
+            candidates = list(cluster.keys())[:self.max_num_people]
+            centroids = [
+                np.mean(
+                    cluster[k]['tags'], axis=0) for k in candidates
+            ]
+            num_clusters = len(centroids)
+            # shape is (num_valid, num_clusters, tag_dim)
+            dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
+            l2_dist = np.linalg.norm(dist, ord=2, axis=2)
+            # modulate dist with heat value, see `use_detection_val`
+            cost = np.round(l2_dist) * 100 - heats[jid, m, None]
+            # pad the cost matrix, otherwise new pose are ignored
+            if num_valid > num_clusters:
+                cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
+                              constant_values=((0, 0), (0, 1e-10)))
+            rows, cols = linear_sum_assignment(cost)
+            for y, x in zip(rows, cols):
+                tag = tags[jid, y]
+                if y < num_valid and x < num_clusters and \
+                   l2_dist[y, x] < self.tag_thresh:
+                    key = candidates[x]  # merge to cluster
+                else:
+                    key = tag[0]  # initialize new cluster
+                cluster[key]['tags'].append(tag)
+                cluster[key]['scores'][jid] = heats[jid, y]
+                cluster[key]['coords'][jid] = coords[jid, y]
+
+        # shape is [k, J, 2] and [k, J]
+        pose_tags = np.array([cluster[k]['tags'] for k in cluster])
+        pose_coords = np.array([cluster[k]['coords'] for k in cluster])
+        pose_scores = np.array([cluster[k]['scores'] for k in cluster])
+        valid = pose_scores > 0
+
+        pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
+        if valid.sum() == 0:
+            return pose_kpts, pose_kpts
+
+        # refine coords
+        valid_coords = pose_coords[valid].astype(np.int32)
+        y = valid_coords[..., 0].flatten()
+        x = valid_coords[..., 1].flatten()
+        _, j = np.nonzero(valid)
+        offsets = self.lerp(j, y, x, heatmap)
+        pose_coords[valid, 0] += offsets[0]
+        pose_coords[valid, 1] += offsets[1]
+
+        # mean score before salvage
+        mean_score = pose_scores.mean(axis=1)
+        pose_kpts[valid, 2] = pose_scores[valid]
+
+        # salvage missing joints
+        if True:
+            for pid, coords in enumerate(pose_coords):
+                tag_mean = np.array(pose_tags[pid]).mean(axis=0)
+                norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
+                score = heatmap - np.round(norm)  # (J, H, W)
+                flat_score = score.reshape(J, -1)
+                max_inds = np.argmax(flat_score, axis=1)
+                max_scores = np.max(flat_score, axis=1)
+                salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
+                if salvage_joints.sum() == 0:
+                    continue
+                y = max_inds[salvage_joints] // W
+                x = max_inds[salvage_joints] % W
+                offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
+                y = y.astype(np.float32) + offsets[0]
+                x = x.astype(np.float32) + offsets[1]
+                pose_coords[pid][salvage_joints, 0] = y
+                pose_coords[pid][salvage_joints, 1] = x
+                pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
+        pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
+                                       original_height, original_width,
+                                       min(H, W))
+        return pose_kpts, mean_score
--- a/build/lib/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/build/lib/ppdet/modeling/architectures/keypoint_hrnet.py
@ -0,0 +1,203 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at 
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and 
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import numpy as np
+import math
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+from ..keypoint_utils import transform_preds
+from .. import layers as L
+
+__all__ = ['TopDownHRNet']
+
+
+@register
+class TopDownHRNet(BaseArch):
+    __category__ = 'architecture'
+    __inject__ = ['loss']
+
+    def __init__(self,
+                 width,
+                 num_joints,
+                 backbone='HRNet',
+                 loss='KeyPointMSELoss',
+                 post_process='HRNetPostProcess',
+                 flip_perm=None,
+                 flip=True,
+                 shift_heatmap=True):
+        """
+        HRNnet network, see https://arxiv.org/abs/1902.09212
+
+        Args:
+            backbone (nn.Layer): backbone instance
+            post_process (object): `HRNetPostProcess` instance
+            flip_perm (list): The left-right joints exchange order list
+        """
+        super(TopDownHRNet, self).__init__()
+        self.backbone = backbone
+        self.post_process = HRNetPostProcess()
+        self.loss = loss
+        self.flip_perm = flip_perm
+        self.flip = flip
+        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
+        self.shift_heatmap = shift_heatmap
+        self.deploy = False
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+
+        return {'backbone': backbone, }
+
+    def _forward(self):
+        feats = self.backbone(self.inputs)
+        hrnet_outputs = self.final_conv(feats[0])
+
+        if self.training:
+            return self.loss(hrnet_outputs, self.inputs)
+        elif self.deploy:
+            return hrnet_outputs
+        else:
+            if self.flip:
+                self.inputs['image'] = self.inputs['image'].flip([3])
+                feats = self.backbone(self.inputs)
+                output_flipped = self.final_conv(feats[0])
+                output_flipped = self.flip_back(output_flipped.numpy(),
+                                                self.flip_perm)
+                output_flipped = paddle.to_tensor(output_flipped.copy())
+                if self.shift_heatmap:
+                    output_flipped[:, :, :, 1:] = output_flipped.clone(
+                    )[:, :, :, 0:-1]
+                hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
+            imshape = (self.inputs['im_shape'].numpy()
+                       )[:, ::-1] if 'im_shape' in self.inputs else None
+            center = self.inputs['center'].numpy(
+            ) if 'center' in self.inputs else np.round(imshape / 2.)
+            scale = self.inputs['scale'].numpy(
+            ) if 'scale' in self.inputs else imshape / 200.
+            outputs = self.post_process(hrnet_outputs, center, scale)
+            return outputs
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        res_lst = self._forward()
+        outputs = {'keypoint': res_lst}
+        return outputs
+
+    def flip_back(self, output_flipped, matched_parts):
+        assert output_flipped.ndim == 4,\
+                'output_flipped should be [batch_size, num_joints, height, width]'
+
+        output_flipped = output_flipped[:, :, :, ::-1]
+
+        for pair in matched_parts:
+            tmp = output_flipped[:, pair[0], :, :].copy()
+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
+            output_flipped[:, pair[1], :, :] = tmp
+
+        return output_flipped
+
+
+class HRNetPostProcess(object):
+    def get_max_preds(self, heatmaps):
+        '''get predictions from score maps
+
+        Args:
+            heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
+        '''
+        assert isinstance(heatmaps,
+                          np.ndarray), 'heatmaps should be numpy.ndarray'
+        assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+        batch_size = heatmaps.shape[0]
+        num_joints = heatmaps.shape[1]
+        width = heatmaps.shape[3]
+        heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
+        idx = np.argmax(heatmaps_reshaped, 2)
+        maxvals = np.amax(heatmaps_reshaped, 2)
+
+        maxvals = maxvals.reshape((batch_size, num_joints, 1))
+        idx = idx.reshape((batch_size, num_joints, 1))
+
+        preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+
+        preds[:, :, 0] = (preds[:, :, 0]) % width
+        preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
+
+        pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
+        pred_mask = pred_mask.astype(np.float32)
+
+        preds *= pred_mask
+
+        return preds, maxvals
+
+    def get_final_preds(self, heatmaps, center, scale):
+        """the highest heatvalue location with a quarter offset in the
+        direction from the highest response to the second highest response.
+
+        Args:
+            heatmaps (numpy.ndarray): The predicted heatmaps
+            center (numpy.ndarray): The boxes center
+            scale (numpy.ndarray): The scale factor
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
+        """
+
+        coords, maxvals = self.get_max_preds(heatmaps)
+
+        heatmap_height = heatmaps.shape[2]
+        heatmap_width = heatmaps.shape[3]
+
+        for n in range(coords.shape[0]):
+            for p in range(coords.shape[1]):
+                hm = heatmaps[n][p]
+                px = int(math.floor(coords[n][p][0] + 0.5))
+                py = int(math.floor(coords[n][p][1] + 0.5))
+                if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
+                    diff = np.array([
+                        hm[py][px + 1] - hm[py][px - 1],
+                        hm[py + 1][px] - hm[py - 1][px]
+                    ])
+                    coords[n][p] += np.sign(diff) * .25
+        preds = coords.copy()
+
+        # Transform back
+        for i in range(coords.shape[0]):
+            preds[i] = transform_preds(coords[i], center[i], scale[i],
+                                       [heatmap_width, heatmap_height])
+
+        return preds, maxvals
+
+    def __call__(self, output, center, scale):
+        preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
+        outputs = [[
+            np.concatenate(
+                (preds, maxvals), axis=-1), np.mean(
+                    maxvals, axis=1)
+        ]]
+        return outputs
--- a/build/lib/ppdet/modeling/architectures/mask_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/mask_rcnn.py
@ -0,0 +1,135 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['MaskRCNN']
+
+
+@register
+class MaskRCNN(BaseArch):
+    """
+    Mask R-CNN network, see https://arxiv.org/abs/1703.06870
+
+    Args:
+        backbone (object): backbone instance
+        rpn_head (object): `RPNHead` instance
+        bbox_head (object): `BBoxHead` instance
+        mask_head (object): `MaskHead` instance
+        bbox_post_process (object): `BBoxPostProcess` instance
+        mask_post_process (object): `MaskPostProcess` instance
+        neck (object): 'FPN' instance
+    """
+
+    __category__ = 'architecture'
+    __inject__ = [
+        'bbox_post_process',
+        'mask_post_process',
+    ]
+
+    def __init__(self,
+                 backbone,
+                 rpn_head,
+                 bbox_head,
+                 mask_head,
+                 bbox_post_process,
+                 mask_post_process,
+                 neck=None):
+        super(MaskRCNN, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.rpn_head = rpn_head
+        self.bbox_head = bbox_head
+        self.mask_head = mask_head
+
+        self.bbox_post_process = bbox_post_process
+        self.mask_post_process = mask_post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
+
+        out_shape = neck and neck.out_shape or backbone.out_shape
+        kwargs = {'input_shape': out_shape}
+        rpn_head = create(cfg['rpn_head'], **kwargs)
+        bbox_head = create(cfg['bbox_head'], **kwargs)
+
+        out_shape = neck and out_shape or bbox_head.get_head().out_shape
+        kwargs = {'input_shape': out_shape}
+        mask_head = create(cfg['mask_head'], **kwargs)
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "rpn_head": rpn_head,
+            "bbox_head": bbox_head,
+            "mask_head": mask_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        if self.neck is not None:
+            body_feats = self.neck(body_feats)
+
+        if self.training:
+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
+            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
+                                                  self.inputs)
+            rois, rois_num = self.bbox_head.get_assigned_rois()
+            bbox_targets = self.bbox_head.get_assigned_targets()
+            # Mask Head needs bbox_feat in Mask RCNN
+            mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
+                                       bbox_targets, bbox_feat)
+            return rpn_loss, bbox_loss, mask_loss
+        else:
+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
+            preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
+
+            im_shape = self.inputs['im_shape']
+            scale_factor = self.inputs['scale_factor']
+
+            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
+                                                    im_shape, scale_factor)
+            mask_out = self.mask_head(
+                body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
+
+            # rescale the prediction back to origin image
+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
+                                                        im_shape, scale_factor)
+            origin_shape = self.bbox_post_process.get_origin_shape()
+            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
+                                               bbox_num, origin_shape)
+            return bbox_pred, bbox_num, mask_pred
+
+    def get_loss(self, ):
+        bbox_loss, mask_loss, rpn_loss = self._forward()
+        loss = {}
+        loss.update(rpn_loss)
+        loss.update(bbox_loss)
+        loss.update(mask_loss)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        bbox_pred, bbox_num, mask_pred = self._forward()
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
+        return output
--- a/build/lib/ppdet/modeling/architectures/meta_arch.py
+++ b/build/lib/ppdet/modeling/architectures/meta_arch.py
@ -0,0 +1,44 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+from ppdet.core.workspace import register
+
+__all__ = ['BaseArch']
+
+
+@register
+class BaseArch(nn.Layer):
+    def __init__(self, data_format='NCHW'):
+        super(BaseArch, self).__init__()
+        self.data_format = data_format
+
+    def forward(self, inputs):
+        if self.data_format == 'NHWC':
+            image = inputs['image']
+            inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
+        self.inputs = inputs
+        self.model_arch()
+
+        if self.training:
+            out = self.get_loss()
+        else:
+            out = self.get_pred()
+        return out
+
+    def build_inputs(self, data, input_def):
+        inputs = {}
+        for i, k in enumerate(input_def):
+            inputs[k] = data[i]
+        return inputs
+
+    def model_arch(self, ):
+        pass
+
+    def get_loss(self, ):
+        raise NotImplementedError("Should implement get_loss method!")
+
+    def get_pred(self, ):
+        raise NotImplementedError("Should implement get_pred method!")
--- a/build/lib/ppdet/modeling/architectures/s2anet.py
+++ b/build/lib/ppdet/modeling/architectures/s2anet.py
@ -0,0 +1,102 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['S2ANet']
+
+
+@register
+class S2ANet(BaseArch):
+    __category__ = 'architecture'
+    __inject__ = [
+        's2anet_head',
+        's2anet_bbox_post_process',
+    ]
+
+    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
+        """
+        S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
+
+        Args:
+            backbone (object): backbone instance
+            neck (object): `FPN` instance
+            s2anet_head (object): `S2ANetHead` instance
+            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
+        """
+        super(S2ANet, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.s2anet_head = s2anet_head
+        self.s2anet_bbox_post_process = s2anet_bbox_post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
+
+        out_shape = neck and neck.out_shape or backbone.out_shape
+        kwargs = {'input_shape': out_shape}
+        s2anet_head = create(cfg['s2anet_head'], **kwargs)
+        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
+                                          **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "s2anet_head": s2anet_head,
+            "s2anet_bbox_post_process": s2anet_bbox_post_process,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        if self.neck is not None:
+            body_feats = self.neck(body_feats)
+        self.s2anet_head(body_feats)
+        if self.training:
+            loss = self.s2anet_head.get_loss(self.inputs)
+            total_loss = paddle.add_n(list(loss.values()))
+            loss.update({'loss': total_loss})
+            return loss
+        else:
+            im_shape = self.inputs['im_shape']
+            scale_factor = self.inputs['scale_factor']
+            nms_pre = self.s2anet_bbox_post_process.nms_pre
+            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
+
+            # post_process
+            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
+                                                                  pred_bboxes)
+            # rescale the prediction back to origin image
+            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
+                pred_bboxes, bbox_num, im_shape, scale_factor)
+
+            # output
+            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
+            return output
+
+    def get_loss(self, ):
+        loss = self._forward()
+        return loss
+
+    def get_pred(self):
+        output = self._forward()
+        return output
--- a/build/lib/ppdet/modeling/architectures/solov2.py
+++ b/build/lib/ppdet/modeling/architectures/solov2.py
@ -0,0 +1,110 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['SOLOv2']
+
+
+@register
+class SOLOv2(BaseArch):
+    """
+    SOLOv2 network, see https://arxiv.org/abs/2003.10152
+
+    Args:
+        backbone (object): an backbone instance
+        solov2_head (object): an `SOLOv2Head` instance
+        mask_head (object): an `SOLOv2MaskHead` instance
+        neck (object): neck of network, such as feature pyramid network instance
+    """
+
+    __category__ = 'architecture'
+
+    def __init__(self, backbone, solov2_head, mask_head, neck=None):
+        super(SOLOv2, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.solov2_head = solov2_head
+        self.mask_head = mask_head
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        kwargs = {'input_shape': neck.out_shape}
+        solov2_head = create(cfg['solov2_head'], **kwargs)
+        mask_head = create(cfg['mask_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            'solov2_head': solov2_head,
+            'mask_head': mask_head,
+        }
+
+    def model_arch(self):
+        body_feats = self.backbone(self.inputs)
+
+        body_feats = self.neck(body_feats)
+
+        self.seg_pred = self.mask_head(body_feats)
+
+        self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
+            body_feats)
+
+    def get_loss(self, ):
+        loss = {}
+        # get gt_ins_labels, gt_cate_labels, etc.
+        gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
+        fg_num = self.inputs['fg_num']
+        for i in range(len(self.solov2_head.seg_num_grids)):
+            ins_label = 'ins_label{}'.format(i)
+            if ins_label in self.inputs:
+                gt_ins_labels.append(self.inputs[ins_label])
+            cate_label = 'cate_label{}'.format(i)
+            if cate_label in self.inputs:
+                gt_cate_labels.append(self.inputs[cate_label])
+            grid_order = 'grid_order{}'.format(i)
+            if grid_order in self.inputs:
+                gt_grid_orders.append(self.inputs[grid_order])
+
+        loss_solov2 = self.solov2_head.get_loss(
+            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
+            gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
+        loss.update(loss_solov2)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
+            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
+            self.inputs['im_shape'], self.inputs['scale_factor'])
+        outs = {
+            "segm": seg_masks,
+            "bbox_num": bbox_num,
+            'cate_label': cate_labels,
+            'cate_score': cate_scores
+        }
+        return outs
--- a/build/lib/ppdet/modeling/architectures/ssd.py
+++ b/build/lib/ppdet/modeling/architectures/ssd.py
@ -0,0 +1,84 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['SSD']
+
+
+@register
+class SSD(BaseArch):
+    """
+    Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
+
+    Args:
+        backbone (nn.Layer): backbone instance
+        ssd_head (nn.Layer): `SSDHead` instance
+        post_process (object): `BBoxPostProcess` instance
+    """
+
+    __category__ = 'architecture'
+    __inject__ = ['post_process']
+
+    def __init__(self, backbone, ssd_head, post_process):
+        super(SSD, self).__init__()
+        self.backbone = backbone
+        self.ssd_head = ssd_head
+        self.post_process = post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+
+        # head
+        kwargs = {'input_shape': backbone.out_shape}
+        ssd_head = create(cfg['ssd_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            "ssd_head": ssd_head,
+        }
+
+    def _forward(self):
+        # Backbone
+        body_feats = self.backbone(self.inputs)
+
+        # SSD Head
+        if self.training:
+            return self.ssd_head(body_feats, self.inputs['image'],
+                                 self.inputs['gt_bbox'],
+                                 self.inputs['gt_class'])
+        else:
+            preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
+            bbox, bbox_num = self.post_process(preds, anchors,
+                                               self.inputs['im_shape'],
+                                               self.inputs['scale_factor'])
+            return bbox, bbox_num
+
+    def get_loss(self, ):
+        return {"loss": self._forward()}
+
+    def get_pred(self):
+        bbox_pred, bbox_num = self._forward()
+        output = {
+            "bbox": bbox_pred,
+            "bbox_num": bbox_num,
+        }
+        return output
--- a/build/lib/ppdet/modeling/architectures/ttfnet.py
+++ b/build/lib/ppdet/modeling/architectures/ttfnet.py
@ -0,0 +1,98 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['TTFNet']
+
+
+@register
+class TTFNet(BaseArch):
+    """
+    TTFNet network, see https://arxiv.org/abs/1909.00700
+
+    Args:
+        backbone (object): backbone instance
+        neck (object): 'TTFFPN' instance
+        ttf_head (object): 'TTFHead' instance
+        post_process (object): 'BBoxPostProcess' instance
+    """
+
+    __category__ = 'architecture'
+    __inject__ = ['post_process']
+
+    def __init__(self,
+                 backbone='DarkNet',
+                 neck='TTFFPN',
+                 ttf_head='TTFHead',
+                 post_process='BBoxPostProcess'):
+        super(TTFNet, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.ttf_head = ttf_head
+        self.post_process = post_process
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        kwargs = {'input_shape': neck.out_shape}
+        ttf_head = create(cfg['ttf_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "ttf_head": ttf_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        body_feats = self.neck(body_feats)
+        hm, wh = self.ttf_head(body_feats)
+        if self.training:
+            return hm, wh
+        else:
+            bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
+                                               self.inputs['scale_factor'])
+            return bbox, bbox_num
+
+    def get_loss(self, ):
+        loss = {}
+        heatmap = self.inputs['ttf_heatmap']
+        box_target = self.inputs['ttf_box_target']
+        reg_weight = self.inputs['ttf_reg_weight']
+        hm, wh = self._forward()
+        head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
+                                           reg_weight)
+        loss.update(head_loss)
+        total_loss = paddle.add_n(list(loss.values()))
+        loss.update({'loss': total_loss})
+        return loss
+
+    def get_pred(self):
+        bbox_pred, bbox_num = self._forward()
+        output = {
+            "bbox": bbox_pred,
+            "bbox_num": bbox_num,
+        }
+        return output
--- a/build/lib/ppdet/modeling/architectures/yolo.py
+++ b/build/lib/ppdet/modeling/architectures/yolo.py
@ -0,0 +1,104 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['YOLOv3']
+
+
+@register
+class YOLOv3(BaseArch):
+    __category__ = 'architecture'
+    __shared__ = ['data_format']
+    __inject__ = ['post_process']
+
+    def __init__(self,
+                 backbone='DarkNet',
+                 neck='YOLOv3FPN',
+                 yolo_head='YOLOv3Head',
+                 post_process='BBoxPostProcess',
+                 data_format='NCHW',
+                 for_mot=False):
+        """
+        YOLOv3 network, see https://arxiv.org/abs/1804.02767
+
+        Args:
+            backbone (nn.Layer): backbone instance
+            neck (nn.Layer): neck instance
+            yolo_head (nn.Layer): anchor_head instance
+            bbox_post_process (object): `BBoxPostProcess` instance
+            data_format (str): data format, NCHW or NHWC
+            for_mot (bool): whether return other features for multi-object tracking
+                models, default False in pure object detection models.
+        """
+        super(YOLOv3, self).__init__(data_format=data_format)
+        self.backbone = backbone
+        self.neck = neck
+        self.yolo_head = yolo_head
+        self.post_process = post_process
+        self.for_mot = for_mot
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+
+        # fpn
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        # head
+        kwargs = {'input_shape': neck.out_shape}
+        yolo_head = create(cfg['yolo_head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "yolo_head": yolo_head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        neck_feats = self.neck(body_feats, self.for_mot)
+
+        if isinstance(neck_feats, dict):
+            assert self.for_mot == True
+            emb_feats = neck_feats['emb_feats']
+            neck_feats = neck_feats['yolo_feats']
+
+        if self.training:
+            yolo_losses = self.yolo_head(neck_feats, self.inputs)
+
+            if self.for_mot:
+                return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
+            else:
+                return yolo_losses
+
+        else:
+            yolo_head_outs = self.yolo_head(neck_feats)
+
+            if self.for_mot:
+                boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
+                    yolo_head_outs, self.yolo_head.mask_anchors)
+                output = {
+                    'bbox': bbox,
+                    'bbox_num': bbox_num,
+                    'boxes_idx': boxes_idx,
+                    'nms_keep_idx': nms_keep_idx,
+                    'emb_feats': emb_feats,
+                }
+            else:
+                bbox, bbox_num = self.post_process(
+                    yolo_head_outs, self.yolo_head.mask_anchors,
+                    self.inputs['im_shape'], self.inputs['scale_factor'])
+                output = {'bbox': bbox, 'bbox_num': bbox_num}
+
+            return output
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        return self._forward()
--- a/build/lib/ppdet/modeling/backbones/init.py
+++ b/build/lib/ppdet/modeling/backbones/init.py
@ -0,0 +1,37 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import vgg
+from . import resnet
+from . import darknet
+from . import mobilenet_v1
+from . import mobilenet_v3
+from . import hrnet
+from . import blazenet
+from . import ghostnet
+from . import senet
+from . import res2net
+from . import dla
+
+from .vgg import *
+from .resnet import *
+from .darknet import *
+from .mobilenet_v1 import *
+from .mobilenet_v3 import *
+from .hrnet import *
+from .blazenet import *
+from .ghostnet import *
+from .senet import *
+from .res2net import *
+from .dla import *
--- a/build/lib/ppdet/modeling/backbones/blazenet.py
+++ b/build/lib/ppdet/modeling/backbones/blazenet.py
@ -0,0 +1,333 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import KaimingNormal
+from ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+
+__all__ = ['BlazeNet']
+
+
+def hard_swish(x):
+    return x * F.relu6(x + 3) / 6.
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 num_groups=1,
+                 act='relu',
+                 conv_lr=0.1,
+                 conv_decay=0.,
+                 norm_decay=0.,
+                 norm_type='bn',
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.act = act
+        self._conv = nn.Conv2D(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                name=name + "_weights"),
+            bias_attr=False)
+
+        param_attr = ParamAttr(name=name + "_bn_scale")
+        bias_attr = ParamAttr(name=name + "_bn_offset")
+        if norm_type == 'sync_bn':
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
+        else:
+            self._batch_norm = nn.BatchNorm(
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        if self.act == "relu":
+            x = F.relu(x)
+        elif self.act == "relu6":
+            x = F.relu6(x)
+        elif self.act == 'leaky':
+            x = F.leaky_relu(x)
+        elif self.act == 'hard_swish':
+            x = hard_swish(x)
+        return x
+
+
+class BlazeBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels1,
+                 out_channels2,
+                 double_channels=None,
+                 stride=1,
+                 use_5x5kernel=True,
+                 act='relu',
+                 name=None):
+        super(BlazeBlock, self).__init__()
+        assert stride in [1, 2]
+        self.use_pool = not stride == 1
+        self.use_double_block = double_channels is not None
+        self.conv_dw = []
+        if use_5x5kernel:
+            self.conv_dw.append(
+                self.add_sublayer(
+                    name + "1_dw",
+                    ConvBNLayer(
+                        in_channels=in_channels,
+                        out_channels=out_channels1,
+                        kernel_size=5,
+                        stride=stride,
+                        padding=2,
+                        num_groups=out_channels1,
+                        name=name + "1_dw")))
+        else:
+            self.conv_dw.append(
+                self.add_sublayer(
+                    name + "1_dw_1",
+                    ConvBNLayer(
+                        in_channels=in_channels,
+                        out_channels=out_channels1,
+                        kernel_size=3,
+                        stride=1,
+                        padding=1,
+                        num_groups=out_channels1,
+                        name=name + "1_dw_1")))
+            self.conv_dw.append(
+                self.add_sublayer(
+                    name + "1_dw_2",
+                    ConvBNLayer(
+                        in_channels=out_channels1,
+                        out_channels=out_channels1,
+                        kernel_size=3,
+                        stride=stride,
+                        padding=1,
+                        num_groups=out_channels1,
+                        name=name + "1_dw_2")))
+        self.act = act if self.use_double_block else None
+        self.conv_pw = ConvBNLayer(
+            in_channels=out_channels1,
+            out_channels=out_channels2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act=self.act,
+            name=name + "1_sep")
+        if self.use_double_block:
+            self.conv_dw2 = []
+            if use_5x5kernel:
+                self.conv_dw2.append(
+                    self.add_sublayer(
+                        name + "2_dw",
+                        ConvBNLayer(
+                            in_channels=out_channels2,
+                            out_channels=out_channels2,
+                            kernel_size=5,
+                            stride=1,
+                            padding=2,
+                            num_groups=out_channels2,
+                            name=name + "2_dw")))
+            else:
+                self.conv_dw2.append(
+                    self.add_sublayer(
+                        name + "2_dw_1",
+                        ConvBNLayer(
+                            in_channels=out_channels2,
+                            out_channels=out_channels2,
+                            kernel_size=3,
+                            stride=1,
+                            padding=1,
+                            num_groups=out_channels2,
+                            name=name + "1_dw_1")))
+                self.conv_dw2.append(
+                    self.add_sublayer(
+                        name + "2_dw_2",
+                        ConvBNLayer(
+                            in_channels=out_channels2,
+                            out_channels=out_channels2,
+                            kernel_size=3,
+                            stride=1,
+                            padding=1,
+                            num_groups=out_channels2,
+                            name=name + "2_dw_2")))
+            self.conv_pw2 = ConvBNLayer(
+                in_channels=out_channels2,
+                out_channels=double_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                name=name + "2_sep")
+        # shortcut
+        if self.use_pool:
+            shortcut_channel = double_channels or out_channels2
+            self._shortcut = []
+            self._shortcut.append(
+                self.add_sublayer(
+                    name + '_shortcut_pool',
+                    nn.MaxPool2D(
+                        kernel_size=stride, stride=stride, ceil_mode=True)))
+            self._shortcut.append(
+                self.add_sublayer(
+                    name + '_shortcut_conv',
+                    ConvBNLayer(
+                        in_channels=in_channels,
+                        out_channels=shortcut_channel,
+                        kernel_size=1,
+                        stride=1,
+                        padding=0,
+                        name="shortcut" + name)))
+
+    def forward(self, x):
+        y = x
+        for conv_dw_block in self.conv_dw:
+            y = conv_dw_block(y)
+        y = self.conv_pw(y)
+        if self.use_double_block:
+            for conv_dw2_block in self.conv_dw2:
+                y = conv_dw2_block(y)
+            y = self.conv_pw2(y)
+        if self.use_pool:
+            for shortcut in self._shortcut:
+                x = shortcut(x)
+        return F.relu(paddle.add(x, y))
+
+
+@register
+@serializable
+class BlazeNet(nn.Layer):
+    """
+    BlazeFace, see https://arxiv.org/abs/1907.05047
+
+    Args:
+        blaze_filters (list): number of filter for each blaze block.
+        double_blaze_filters (list): number of filter for each double_blaze block.
+        use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv.
+    """
+
+    def __init__(
+            self,
+            blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]],
+            double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                                  [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]],
+            use_5x5kernel=True,
+            act=None):
+        super(BlazeNet, self).__init__()
+        conv1_num_filters = blaze_filters[0][0]
+        self.conv1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=conv1_num_filters,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            name="conv1")
+        in_channels = conv1_num_filters
+        self.blaze_block = []
+        self._out_channels = []
+        for k, v in enumerate(blaze_filters):
+            assert len(v) in [2, 3], \
+                "blaze_filters {} not in [2, 3]"
+            if len(v) == 2:
+                self.blaze_block.append(
+                    self.add_sublayer(
+                        'blaze_{}'.format(k),
+                        BlazeBlock(
+                            in_channels,
+                            v[0],
+                            v[1],
+                            use_5x5kernel=use_5x5kernel,
+                            act=act,
+                            name='blaze_{}'.format(k))))
+            elif len(v) == 3:
+                self.blaze_block.append(
+                    self.add_sublayer(
+                        'blaze_{}'.format(k),
+                        BlazeBlock(
+                            in_channels,
+                            v[0],
+                            v[1],
+                            stride=v[2],
+                            use_5x5kernel=use_5x5kernel,
+                            act=act,
+                            name='blaze_{}'.format(k))))
+            in_channels = v[1]
+
+        for k, v in enumerate(double_blaze_filters):
+            assert len(v) in [3, 4], \
+                "blaze_filters {} not in [3, 4]"
+            if len(v) == 3:
+                self.blaze_block.append(
+                    self.add_sublayer(
+                        'double_blaze_{}'.format(k),
+                        BlazeBlock(
+                            in_channels,
+                            v[0],
+                            v[1],
+                            double_channels=v[2],
+                            use_5x5kernel=use_5x5kernel,
+                            act=act,
+                            name='double_blaze_{}'.format(k))))
+            elif len(v) == 4:
+                self.blaze_block.append(
+                    self.add_sublayer(
+                        'double_blaze_{}'.format(k),
+                        BlazeBlock(
+                            in_channels,
+                            v[0],
+                            v[1],
+                            double_channels=v[2],
+                            stride=v[3],
+                            use_5x5kernel=use_5x5kernel,
+                            act=act,
+                            name='double_blaze_{}'.format(k))))
+            in_channels = v[2]
+            self._out_channels.append(in_channels)
+
+    def forward(self, inputs):
+        outs = []
+        y = self.conv1(inputs['image'])
+        for block in self.blaze_block:
+            y = block(y)
+            outs.append(y)
+        return [outs[-4], outs[-1]]
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(channels=c)
+            for c in [self._out_channels[-4], self._out_channels[-1]]
+        ]
--- a/build/lib/ppdet/modeling/backbones/darknet.py
+++ b/build/lib/ppdet/modeling/backbones/darknet.py
@ -0,0 +1,340 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register, serializable
+from ppdet.modeling.ops import batch_norm, mish
+from ..shape_spec import ShapeSpec
+
+__all__ = ['DarkNet', 'ConvBNLayer']
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size=3,
+                 stride=1,
+                 groups=1,
+                 padding=0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 act="leaky",
+                 freeze_norm=False,
+                 data_format='NCHW',
+                 name=''):
+        """
+        conv + bn + activation layer
+
+        Args:
+            ch_in (int): input channel
+            ch_out (int): output channel
+            filter_size (int): filter size, default 3
+            stride (int): stride, default 1
+            groups (int): number of groups of conv layer, default 1
+            padding (int): padding size, default 0
+            norm_type (str): batch norm type, default bn
+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
+            act (str): activation function type, default 'leaky', which means leaky_relu
+            freeze_norm (bool): whether to freeze norm, default False
+            data_format (str): data format, NCHW or NHWC
+        """
+        super(ConvBNLayer, self).__init__()
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            data_format=data_format,
+            bias_attr=False)
+        self.batch_norm = batch_norm(
+            ch_out,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+        self.act = act
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.batch_norm(out)
+        if self.act == 'leaky':
+            out = F.leaky_relu(out, 0.1)
+        elif self.act == 'mish':
+            out = mish(out)
+        return out
+
+
+class DownSample(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size=3,
+                 stride=2,
+                 padding=1,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 data_format='NCHW'):
+        """
+        downsample layer
+
+        Args:
+            ch_in (int): input channel
+            ch_out (int): output channel
+            filter_size (int): filter size, default 3
+            stride (int): stride, default 2
+            padding (int): padding size, default 1
+            norm_type (str): batch norm type, default bn
+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
+            freeze_norm (bool): whether to freeze norm, default False
+            data_format (str): data format, NCHW or NHWC
+        """
+
+        super(DownSample, self).__init__()
+
+        self.conv_bn_layer = ConvBNLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+        self.ch_out = ch_out
+
+    def forward(self, inputs):
+        out = self.conv_bn_layer(inputs)
+        return out
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 data_format='NCHW'):
+        """
+        BasicBlock layer of DarkNet
+
+        Args:
+            ch_in (int): input channel
+            ch_out (int): output channel
+            norm_type (str): batch norm type, default bn
+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
+            freeze_norm (bool): whether to freeze norm, default False
+            data_format (str): data format, NCHW or NHWC
+        """
+
+        super(BasicBlock, self).__init__()
+
+        self.conv1 = ConvBNLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+        self.conv2 = ConvBNLayer(
+            ch_in=ch_out,
+            ch_out=ch_out * 2,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+
+    def forward(self, inputs):
+        conv1 = self.conv1(inputs)
+        conv2 = self.conv2(conv1)
+        out = paddle.add(x=inputs, y=conv2)
+        return out
+
+
+class Blocks(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 count,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 name=None,
+                 data_format='NCHW'):
+        """
+        Blocks layer, which consist of some BaickBlock layers
+
+        Args:
+            ch_in (int): input channel
+            ch_out (int): output channel
+            count (int): number of BasicBlock layer
+            norm_type (str): batch norm type, default bn
+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
+            freeze_norm (bool): whether to freeze norm, default False
+            name (str): layer name
+            data_format (str): data format, NCHW or NHWC
+        """
+        super(Blocks, self).__init__()
+
+        self.basicblock0 = BasicBlock(
+            ch_in,
+            ch_out,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+        self.res_out_list = []
+        for i in range(1, count):
+            block_name = '{}.{}'.format(name, i)
+            res_out = self.add_sublayer(
+                block_name,
+                BasicBlock(
+                    ch_out * 2,
+                    ch_out,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    data_format=data_format))
+            self.res_out_list.append(res_out)
+        self.ch_out = ch_out
+
+    def forward(self, inputs):
+        y = self.basicblock0(inputs)
+        for basic_block_i in self.res_out_list:
+            y = basic_block_i(y)
+        return y
+
+
+DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
+
+
+@register
+@serializable
+class DarkNet(nn.Layer):
+    __shared__ = ['norm_type', 'data_format']
+
+    def __init__(self,
+                 depth=53,
+                 freeze_at=-1,
+                 return_idx=[2, 3, 4],
+                 num_stages=5,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 data_format='NCHW'):
+        """
+        Darknet, see https://pjreddie.com/darknet/yolo/
+
+        Args:
+            depth (int): depth of network
+            freeze_at (int): freeze the backbone at which stage
+            filter_size (int): filter size, default 3
+            return_idx (list): index of stages whose feature maps are returned
+            norm_type (str): batch norm type, default bn
+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
+            data_format (str): data format, NCHW or NHWC
+        """
+        super(DarkNet, self).__init__()
+        self.depth = depth
+        self.freeze_at = freeze_at
+        self.return_idx = return_idx
+        self.num_stages = num_stages
+        self.stages = DarkNet_cfg[self.depth][0:num_stages]
+
+        self.conv0 = ConvBNLayer(
+            ch_in=3,
+            ch_out=32,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+
+        self.downsample0 = DownSample(
+            ch_in=32,
+            ch_out=32 * 2,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            data_format=data_format)
+
+        self._out_channels = []
+        self.darknet_conv_block_list = []
+        self.downsample_list = []
+        ch_in = [64, 128, 256, 512, 1024]
+        for i, stage in enumerate(self.stages):
+            name = 'stage.{}'.format(i)
+            conv_block = self.add_sublayer(
+                name,
+                Blocks(
+                    int(ch_in[i]),
+                    32 * (2**i),
+                    stage,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    data_format=data_format,
+                    name=name))
+            self.darknet_conv_block_list.append(conv_block)
+            if i in return_idx:
+                self._out_channels.append(64 * (2**i))
+        for i in range(num_stages - 1):
+            down_name = 'stage.{}.downsample'.format(i)
+            downsample = self.add_sublayer(
+                down_name,
+                DownSample(
+                    ch_in=32 * (2**(i + 1)),
+                    ch_out=32 * (2**(i + 2)),
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    data_format=data_format))
+            self.downsample_list.append(downsample)
+
+    def forward(self, inputs):
+        x = inputs['image']
+
+        out = self.conv0(x)
+        out = self.downsample0(out)
+        blocks = []
+        for i, conv_block_i in enumerate(self.darknet_conv_block_list):
+            out = conv_block_i(out)
+            if i == self.freeze_at:
+                out.stop_gradient = True
+            if i in self.return_idx:
+                blocks.append(out)
+            if i < self.num_stages - 1:
+                out = self.downsample_list[i](out)
+        return blocks
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/dla.py
+++ b/build/lib/ppdet/modeling/backbones/dla.py
@ -0,0 +1,243 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register, serializable
+from ppdet.modeling.layers import ConvNormLayer
+from ..shape_spec import ShapeSpec
+
+DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])}
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self, ch_in, ch_out, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = ConvNormLayer(
+            ch_in,
+            ch_out,
+            filter_size=3,
+            stride=stride,
+            bias_on=False,
+            norm_decay=None)
+        self.conv2 = ConvNormLayer(
+            ch_out,
+            ch_out,
+            filter_size=3,
+            stride=1,
+            bias_on=False,
+            norm_decay=None)
+
+    def forward(self, inputs, residual=None):
+        if residual is None:
+            residual = inputs
+
+        out = self.conv1(inputs)
+        out = F.relu(out)
+
+        out = self.conv2(out)
+
+        out = paddle.add(x=out, y=residual)
+        out = F.relu(out)
+
+        return out
+
+
+class Root(nn.Layer):
+    def __init__(self, ch_in, ch_out, kernel_size, residual):
+        super(Root, self).__init__()
+        self.conv = ConvNormLayer(
+            ch_in,
+            ch_out,
+            filter_size=1,
+            stride=1,
+            bias_on=False,
+            norm_decay=None)
+        self.residual = residual
+
+    def forward(self, inputs):
+        children = inputs
+        out = self.conv(paddle.concat(inputs, axis=1))
+        if self.residual:
+            out = paddle.add(x=out, y=children[0])
+        out = F.relu(out)
+
+        return out
+
+
+class Tree(nn.Layer):
+    def __init__(self,
+                 level,
+                 block,
+                 ch_in,
+                 ch_out,
+                 stride=1,
+                 level_root=False,
+                 root_dim=0,
+                 root_kernel_size=1,
+                 root_residual=False):
+        super(Tree, self).__init__()
+        if root_dim == 0:
+            root_dim = 2 * ch_out
+        if level_root:
+            root_dim += ch_in
+        if level == 1:
+            self.tree1 = block(ch_in, ch_out, stride)
+            self.tree2 = block(ch_out, ch_out, 1)
+        else:
+            self.tree1 = Tree(
+                level - 1,
+                block,
+                ch_in,
+                ch_out,
+                stride,
+                root_dim=0,
+                root_kernel_size=root_kernel_size,
+                root_residual=root_residual)
+            self.tree2 = Tree(
+                level - 1,
+                block,
+                ch_out,
+                ch_out,
+                1,
+                root_dim=root_dim + ch_out,
+                root_kernel_size=root_kernel_size,
+                root_residual=root_residual)
+
+        if level == 1:
+            self.root = Root(root_dim, ch_out, root_kernel_size, root_residual)
+        self.level_root = level_root
+        self.root_dim = root_dim
+        self.downsample = None
+        self.project = None
+        self.level = level
+        if stride > 1:
+            self.downsample = nn.MaxPool2D(stride, stride=stride)
+        if ch_in != ch_out:
+            self.project = ConvNormLayer(
+                ch_in,
+                ch_out,
+                filter_size=1,
+                stride=1,
+                bias_on=False,
+                norm_decay=None)
+
+    def forward(self, x, residual=None, children=None):
+        children = [] if children is None else children
+        bottom = self.downsample(x) if self.downsample else x
+        residual = self.project(bottom) if self.project else bottom
+        if self.level_root:
+            children.append(bottom)
+        x1 = self.tree1(x, residual)
+        if self.level == 1:
+            x2 = self.tree2(x1)
+            x = self.root([x2, x1] + children)
+        else:
+            children.append(x1)
+            x = self.tree2(x1, children=children)
+        return x
+
+
+@register
+@serializable
+class DLA(nn.Layer):
+    """
+    DLA, see https://arxiv.org/pdf/1707.06484.pdf
+
+    Args:
+        depth (int): DLA depth, should be 34.
+        residual_root (bool): whether use a reidual layer in the root block
+
+    """
+
+    def __init__(self, depth=34, residual_root=False):
+        super(DLA, self).__init__()
+        levels, channels = DLA_cfg[depth]
+        if depth == 34:
+            block = BasicBlock
+        self.channels = channels
+        self.base_layer = nn.Sequential(
+            ConvNormLayer(
+                3,
+                channels[0],
+                filter_size=7,
+                stride=1,
+                bias_on=False,
+                norm_decay=None),
+            nn.ReLU())
+        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
+        self.level1 = self._make_conv_level(
+            channels[0], channels[1], levels[1], stride=2)
+        self.level2 = Tree(
+            levels[2],
+            block,
+            channels[1],
+            channels[2],
+            2,
+            level_root=False,
+            root_residual=residual_root)
+        self.level3 = Tree(
+            levels[3],
+            block,
+            channels[2],
+            channels[3],
+            2,
+            level_root=True,
+            root_residual=residual_root)
+        self.level4 = Tree(
+            levels[4],
+            block,
+            channels[3],
+            channels[4],
+            2,
+            level_root=True,
+            root_residual=residual_root)
+        self.level5 = Tree(
+            levels[5],
+            block,
+            channels[4],
+            channels[5],
+            2,
+            level_root=True,
+            root_residual=residual_root)
+
+    def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1):
+        modules = []
+        for i in range(conv_num):
+            modules.extend([
+                ConvNormLayer(
+                    ch_in,
+                    ch_out,
+                    filter_size=3,
+                    stride=stride if i == 0 else 1,
+                    bias_on=False,
+                    norm_decay=None), nn.ReLU()
+            ])
+            ch_in = ch_out
+        return nn.Sequential(*modules)
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=self.channels[i]) for i in range(6)]
+
+    def forward(self, inputs):
+        outs = []
+        im = inputs['image']
+        feats = self.base_layer(im)
+        for i in range(6):
+            feats = getattr(self, 'level{}'.format(i))(feats)
+            outs.append(feats)
+
+        return outs
--- a/build/lib/ppdet/modeling/backbones/ghostnet.py
+++ b/build/lib/ppdet/modeling/backbones/ghostnet.py
@ -0,0 +1,476 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import AdaptiveAvgPool2D, Linear
+from paddle.nn.initializer import Uniform
+
+from ppdet.core.workspace import register, serializable
+from numbers import Integral
+from ..shape_spec import ShapeSpec
+from .mobilenet_v3 import make_divisible, ConvBNLayer
+
+__all__ = ['GhostNet']
+
+
+class ExtraBlockDW(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 ch_1,
+                 ch_2,
+                 stride,
+                 lr_mult,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 name=None):
+        super(ExtraBlockDW, self).__init__()
+        self.pointwise_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=ch_1,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra1")
+        self.depthwise_conv = ConvBNLayer(
+            in_c=ch_1,
+            out_c=ch_2,
+            filter_size=3,
+            stride=stride,
+            padding=1,  #
+            num_groups=int(ch_1),
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra2_dw")
+        self.normal_conv = ConvBNLayer(
+            in_c=ch_2,
+            out_c=ch_2,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra2_sep")
+
+    def forward(self, inputs):
+        x = self.pointwise_conv(inputs)
+        x = self.depthwise_conv(x)
+        x = self.normal_conv(x)
+        return x
+
+
+class SEBlock(nn.Layer):
+    def __init__(self, num_channels, lr_mult, reduction_ratio=4, name=None):
+        super(SEBlock, self).__init__()
+        self.pool2d_gap = AdaptiveAvgPool2D(1)
+        self._num_channels = num_channels
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        med_ch = num_channels // reduction_ratio
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_1_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_1_offset"))
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_channels,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_2_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        pool = self.pool2d_gap(inputs)
+        pool = paddle.squeeze(pool, axis=[2, 3])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = paddle.clip(x=excitation, min=0, max=1)
+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
+        out = paddle.multiply(inputs, excitation)
+        return out
+
+
+class GhostModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 output_channels,
+                 kernel_size=1,
+                 ratio=2,
+                 dw_size=3,
+                 stride=1,
+                 relu=True,
+                 lr_mult=1.,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 name=None):
+        super(GhostModule, self).__init__()
+        init_channels = int(math.ceil(output_channels / ratio))
+        new_channels = int(init_channels * (ratio - 1))
+        self.primary_conv = ConvBNLayer(
+            in_c=in_channels,
+            out_c=init_channels,
+            filter_size=kernel_size,
+            stride=stride,
+            padding=int((kernel_size - 1) // 2),
+            num_groups=1,
+            act="relu" if relu else None,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_primary_conv")
+        self.cheap_operation = ConvBNLayer(
+            in_c=init_channels,
+            out_c=new_channels,
+            filter_size=dw_size,
+            stride=1,
+            padding=int((dw_size - 1) // 2),
+            num_groups=init_channels,
+            act="relu" if relu else None,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_cheap_operation")
+
+    def forward(self, inputs):
+        x = self.primary_conv(inputs)
+        y = self.cheap_operation(x)
+        out = paddle.concat([x, y], axis=1)
+        return out
+
+
+class GhostBottleneck(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 hidden_dim,
+                 output_channels,
+                 kernel_size,
+                 stride,
+                 use_se,
+                 lr_mult,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 return_list=False,
+                 name=None):
+        super(GhostBottleneck, self).__init__()
+        self._stride = stride
+        self._use_se = use_se
+        self._num_channels = in_channels
+        self._output_channels = output_channels
+        self.return_list = return_list
+
+        self.ghost_module_1 = GhostModule(
+            in_channels=in_channels,
+            output_channels=hidden_dim,
+            kernel_size=1,
+            stride=1,
+            relu=True,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_ghost_module_1")
+        if stride == 2:
+            self.depthwise_conv = ConvBNLayer(
+                in_c=hidden_dim,
+                out_c=hidden_dim,
+                filter_size=kernel_size,
+                stride=stride,
+                padding=int((kernel_size - 1) // 2),
+                num_groups=hidden_dim,
+                act=None,
+                lr_mult=lr_mult,
+                conv_decay=conv_decay,
+                norm_type=norm_type,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                name=name +
+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+        if use_se:
+            self.se_block = SEBlock(hidden_dim, lr_mult, name=name + "_se")
+        self.ghost_module_2 = GhostModule(
+            in_channels=hidden_dim,
+            output_channels=output_channels,
+            kernel_size=1,
+            relu=False,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_ghost_module_2")
+        if stride != 1 or in_channels != output_channels:
+            self.shortcut_depthwise = ConvBNLayer(
+                in_c=in_channels,
+                out_c=in_channels,
+                filter_size=kernel_size,
+                stride=stride,
+                padding=int((kernel_size - 1) // 2),
+                num_groups=in_channels,
+                act=None,
+                lr_mult=lr_mult,
+                conv_decay=conv_decay,
+                norm_type=norm_type,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                name=name +
+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+            self.shortcut_conv = ConvBNLayer(
+                in_c=in_channels,
+                out_c=output_channels,
+                filter_size=1,
+                stride=1,
+                padding=0,
+                num_groups=1,
+                act=None,
+                lr_mult=lr_mult,
+                conv_decay=conv_decay,
+                norm_type=norm_type,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                name=name + "_shortcut_conv")
+
+    def forward(self, inputs):
+        y = self.ghost_module_1(inputs)
+        x = y
+        if self._stride == 2:
+            x = self.depthwise_conv(x)
+        if self._use_se:
+            x = self.se_block(x)
+        x = self.ghost_module_2(x)
+
+        if self._stride == 1 and self._num_channels == self._output_channels:
+            shortcut = inputs
+        else:
+            shortcut = self.shortcut_depthwise(inputs)
+            shortcut = self.shortcut_conv(shortcut)
+        x = paddle.add(x=x, y=shortcut)
+
+        if self.return_list:
+            return [y, x]
+        else:
+            return x
+
+
+@register
+@serializable
+class GhostNet(nn.Layer):
+    __shared__ = ['norm_type']
+
+    def __init__(
+            self,
+            scale=1.3,
+            feature_maps=[6, 12, 15],
+            with_extra_blocks=False,
+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+            conv_decay=0.,
+            norm_type='bn',
+            norm_decay=0.0,
+            freeze_norm=False):
+        super(GhostNet, self).__init__()
+        if isinstance(feature_maps, Integral):
+            feature_maps = [feature_maps]
+        if norm_type == 'sync_bn' and freeze_norm:
+            raise ValueError(
+                "The norm_type should not be sync_bn when freeze_norm is True")
+        self.feature_maps = feature_maps
+        self.with_extra_blocks = with_extra_blocks
+        self.extra_block_filters = extra_block_filters
+
+        inplanes = 16
+        self.cfgs = [
+            # k, t, c, SE, s
+            [3, 16, 16, 0, 1],
+            [3, 48, 24, 0, 2],
+            [3, 72, 24, 0, 1],
+            [5, 72, 40, 1, 2],
+            [5, 120, 40, 1, 1],
+            [3, 240, 80, 0, 2],
+            [3, 200, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 480, 112, 1, 1],
+            [3, 672, 112, 1, 1],
+            [5, 672, 160, 1, 2],  # SSDLite output
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1]
+        ]
+        self.scale = scale
+        conv1_out_ch = int(make_divisible(inplanes * self.scale, 4))
+        self.conv1 = ConvBNLayer(
+            in_c=3,
+            out_c=conv1_out_ch,
+            filter_size=3,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            act="relu",
+            lr_mult=1.,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="conv1")
+
+        # build inverted residual blocks
+        self._out_channels = []
+        self.ghost_bottleneck_list = []
+        idx = 0
+        inplanes = conv1_out_ch
+        for k, exp_size, c, use_se, s in self.cfgs:
+            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
+            lr_mult = lr_mult_list[lr_idx]
+
+            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
+            return_list = self.with_extra_blocks and idx + 2 in self.feature_maps
+
+            ghost_bottleneck = self.add_sublayer(
+                "_ghostbottleneck_" + str(idx),
+                sublayer=GhostBottleneck(
+                    in_channels=inplanes,
+                    hidden_dim=int(make_divisible(exp_size * self.scale, 4)),
+                    output_channels=int(make_divisible(c * self.scale, 4)),
+                    kernel_size=k,
+                    stride=s,
+                    use_se=use_se,
+                    lr_mult=lr_mult,
+                    conv_decay=conv_decay,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    return_list=return_list,
+                    name="_ghostbottleneck_" + str(idx)))
+            self.ghost_bottleneck_list.append(ghost_bottleneck)
+            inplanes = int(make_divisible(c * self.scale, 4))
+            idx += 1
+            self._update_out_channels(
+                int(make_divisible(exp_size * self.scale, 4))
+                if return_list else inplanes, idx + 1, feature_maps)
+
+        if self.with_extra_blocks:
+            self.extra_block_list = []
+            extra_out_c = int(make_divisible(self.scale * self.cfgs[-1][1], 4))
+            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
+            lr_mult = lr_mult_list[lr_idx]
+
+            conv_extra = self.add_sublayer(
+                "conv" + str(idx + 2),
+                sublayer=ConvBNLayer(
+                    in_c=inplanes,
+                    out_c=extra_out_c,
+                    filter_size=1,
+                    stride=1,
+                    padding=0,
+                    num_groups=1,
+                    act="relu6",
+                    lr_mult=lr_mult,
+                    conv_decay=conv_decay,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    name="conv" + str(idx + 2)))
+            self.extra_block_list.append(conv_extra)
+            idx += 1
+            self._update_out_channels(extra_out_c, idx + 1, feature_maps)
+
+            for j, block_filter in enumerate(self.extra_block_filters):
+                in_c = extra_out_c if j == 0 else self.extra_block_filters[j -
+                                                                           1][1]
+                conv_extra = self.add_sublayer(
+                    "conv" + str(idx + 2),
+                    sublayer=ExtraBlockDW(
+                        in_c,
+                        block_filter[0],
+                        block_filter[1],
+                        stride=2,
+                        lr_mult=lr_mult,
+                        conv_decay=conv_decay,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        name='conv' + str(idx + 2)))
+                self.extra_block_list.append(conv_extra)
+                idx += 1
+                self._update_out_channels(block_filter[1], idx + 1,
+                                          feature_maps)
+
+    def _update_out_channels(self, channel, feature_idx, feature_maps):
+        if feature_idx in feature_maps:
+            self._out_channels.append(channel)
+
+    def forward(self, inputs):
+        x = self.conv1(inputs['image'])
+        outs = []
+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
+            x = ghost_bottleneck(x)
+            if idx + 2 in self.feature_maps:
+                if isinstance(x, list):
+                    outs.append(x[0])
+                    x = x[1]
+                else:
+                    outs.append(x)
+
+        if not self.with_extra_blocks:
+            return outs
+
+        for i, block in enumerate(self.extra_block_list):
+            idx = i + len(self.ghost_bottleneck_list)
+            x = block(x)
+            if idx + 2 in self.feature_maps:
+                outs.append(x)
+        return outs
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/hrnet.py
+++ b/build/lib/ppdet/modeling/backbones/hrnet.py
@ -0,0 +1,724 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.regularizer import L2Decay
+from paddle import ParamAttr
+from paddle.nn.initializer import Normal
+from numbers import Integral
+import math
+
+from ppdet.core.workspace import register
+from ..shape_spec import ShapeSpec
+
+__all__ = ['HRNet']
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 norm_type='bn',
+                 norm_groups=32,
+                 use_dcn=False,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 act=None,
+                 name=None):
+        super(ConvNormLayer, self).__init__()
+        assert norm_type in ['bn', 'sync_bn', 'gn']
+
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=1,
+            weight_attr=ParamAttr(
+                name=name + "_weights", initializer=Normal(
+                    mean=0., std=0.01)),
+            bias_attr=False)
+
+        norm_lr = 0. if freeze_norm else 1.
+
+        norm_name = name + '_bn'
+        param_attr = ParamAttr(
+            name=norm_name + "_scale",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
+        bias_attr = ParamAttr(
+            name=norm_name + "_offset",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
+        global_stats = True if freeze_norm else False
+        if norm_type in ['bn', 'sync_bn']:
+            self.norm = nn.BatchNorm(
+                ch_out,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=global_stats,
+                moving_mean_name=norm_name + '_mean',
+                moving_variance_name=norm_name + '_variance')
+        elif norm_type == 'gn':
+            self.norm = nn.GroupNorm(
+                num_groups=norm_groups,
+                num_channels=ch_out,
+                weight_attr=param_attr,
+                bias_attr=bias_attr)
+        norm_params = self.norm.parameters()
+        if freeze_norm:
+            for param in norm_params:
+                param.stop_gradient = True
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+
+        if self.act == 'relu':
+            out = F.relu(out)
+        return out
+
+
+class Layer1(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 has_se=False,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(Layer1, self).__init__()
+
+        self.bottleneck_block_list = []
+
+        for i in range(4):
+            bottleneck_block = self.add_sublayer(
+                "block_{}_{}".format(name, i + 1),
+                BottleneckBlock(
+                    num_channels=num_channels if i == 0 else 256,
+                    num_filters=64,
+                    has_se=has_se,
+                    stride=1,
+                    downsample=True if i == 0 else False,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    name=name + '_' + str(i + 1)))
+            self.bottleneck_block_list.append(bottleneck_block)
+
+    def forward(self, input):
+        conv = input
+        for block_func in self.bottleneck_block_list:
+            conv = block_func(conv)
+        return conv
+
+
+class TransitionLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(TransitionLayer, self).__init__()
+
+        num_in = len(in_channels)
+        num_out = len(out_channels)
+        out = []
+        self.conv_bn_func_list = []
+        for i in range(num_out):
+            residual = None
+            if i < num_in:
+                if in_channels[i] != out_channels[i]:
+                    residual = self.add_sublayer(
+                        "transition_{}_layer_{}".format(name, i + 1),
+                        ConvNormLayer(
+                            ch_in=in_channels[i],
+                            ch_out=out_channels[i],
+                            filter_size=3,
+                            norm_decay=norm_decay,
+                            freeze_norm=freeze_norm,
+                            act='relu',
+                            name=name + '_layer_' + str(i + 1)))
+            else:
+                residual = self.add_sublayer(
+                    "transition_{}_layer_{}".format(name, i + 1),
+                    ConvNormLayer(
+                        ch_in=in_channels[-1],
+                        ch_out=out_channels[i],
+                        filter_size=3,
+                        stride=2,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        act='relu',
+                        name=name + '_layer_' + str(i + 1)))
+            self.conv_bn_func_list.append(residual)
+
+    def forward(self, input):
+        outs = []
+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
+            if conv_bn_func is None:
+                outs.append(input[idx])
+            else:
+                if idx < len(input):
+                    outs.append(conv_bn_func(input[idx]))
+                else:
+                    outs.append(conv_bn_func(input[-1]))
+        return outs
+
+
+class Branches(nn.Layer):
+    def __init__(self,
+                 block_num,
+                 in_channels,
+                 out_channels,
+                 has_se=False,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(Branches, self).__init__()
+
+        self.basic_block_list = []
+        for i in range(len(out_channels)):
+            self.basic_block_list.append([])
+            for j in range(block_num):
+                in_ch = in_channels[i] if j == 0 else out_channels[i]
+                basic_block_func = self.add_sublayer(
+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
+                    BasicBlock(
+                        num_channels=in_ch,
+                        num_filters=out_channels[i],
+                        has_se=has_se,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
+                        str(j + 1)))
+                self.basic_block_list[i].append(basic_block_func)
+
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            conv = input
+            basic_block_list = self.basic_block_list[idx]
+            for basic_block_func in basic_block_list:
+                conv = basic_block_func(conv)
+            outs.append(conv)
+        return outs
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se,
+                 stride=1,
+                 downsample=False,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = ConvNormLayer(
+            ch_in=num_channels,
+            ch_out=num_filters,
+            filter_size=1,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            act="relu",
+            name=name + "_conv1")
+        self.conv2 = ConvNormLayer(
+            ch_in=num_filters,
+            ch_out=num_filters,
+            filter_size=3,
+            stride=stride,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            act="relu",
+            name=name + "_conv2")
+        self.conv3 = ConvNormLayer(
+            ch_in=num_filters,
+            ch_out=num_filters * 4,
+            filter_size=1,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            act=None,
+            name=name + "_conv3")
+
+        if self.downsample:
+            self.conv_down = ConvNormLayer(
+                ch_in=num_channels,
+                ch_out=num_filters * 4,
+                filter_size=1,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                act=None,
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters * 4,
+                num_filters=num_filters * 4,
+                reduction_ratio=16,
+                name='fc' + name)
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+        conv3 = self.conv3(conv2)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv3 = self.se(conv3)
+
+        y = paddle.add(x=residual, y=conv3)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride=1,
+                 has_se=False,
+                 downsample=False,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(BasicBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+        self.conv1 = ConvNormLayer(
+            ch_in=num_channels,
+            ch_out=num_filters,
+            filter_size=3,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            stride=stride,
+            act="relu",
+            name=name + "_conv1")
+        self.conv2 = ConvNormLayer(
+            ch_in=num_filters,
+            ch_out=num_filters,
+            filter_size=3,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            stride=1,
+            act=None,
+            name=name + "_conv2")
+
+        if self.downsample:
+            self.conv_down = ConvNormLayer(
+                ch_in=num_channels,
+                ch_out=num_filters * 4,
+                filter_size=1,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                act=None,
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters,
+                num_filters=num_filters,
+                reduction_ratio=16,
+                name='fc' + name)
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv2 = self.se(conv2)
+
+        y = paddle.add(x=residual, y=conv2)
+        y = F.relu(y)
+        return y
+
+
+class SELayer(nn.Layer):
+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
+        super(SELayer, self).__init__()
+
+        self.pool2d_gap = AdaptiveAvgPool2D(1)
+
+        self._num_channels = num_channels
+
+        med_ch = int(num_channels / reduction_ratio)
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_filters,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
+
+    def forward(self, input):
+        pool = self.pool2d_gap(input)
+        pool = paddle.squeeze(pool, axis=[2, 3])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = F.sigmoid(excitation)
+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
+        out = input * excitation
+        return out
+
+
+class Stage(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_modules,
+                 num_filters,
+                 has_se=False,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 multi_scale_output=True,
+                 name=None):
+        super(Stage, self).__init__()
+
+        self._num_modules = num_modules
+        self.stage_func_list = []
+        for i in range(num_modules):
+            if i == num_modules - 1 and not multi_scale_output:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        multi_scale_output=False,
+                        name=name + '_' + str(i + 1)))
+            else:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        name=name + '_' + str(i + 1)))
+
+            self.stage_func_list.append(stage_func)
+
+    def forward(self, input):
+        out = input
+        for idx in range(self._num_modules):
+            out = self.stage_func_list[idx](out)
+        return out
+
+
+class HighResolutionModule(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(HighResolutionModule, self).__init__()
+        self.branches_func = Branches(
+            block_num=4,
+            in_channels=num_channels,
+            out_channels=num_filters,
+            has_se=has_se,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name)
+
+        self.fuse_func = FuseLayers(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            multi_scale_output=multi_scale_output,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name)
+
+    def forward(self, input):
+        out = self.branches_func(input)
+        out = self.fuse_func(out)
+        return out
+
+
+class FuseLayers(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 multi_scale_output=True,
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 name=None):
+        super(FuseLayers, self).__init__()
+
+        self._actual_ch = len(in_channels) if multi_scale_output else 1
+        self._in_channels = in_channels
+
+        self.residual_func_list = []
+        for i in range(self._actual_ch):
+            for j in range(len(in_channels)):
+                residual_func = None
+                if j > i:
+                    residual_func = self.add_sublayer(
+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
+                        ConvNormLayer(
+                            ch_in=in_channels[j],
+                            ch_out=out_channels[i],
+                            filter_size=1,
+                            stride=1,
+                            act=None,
+                            norm_decay=norm_decay,
+                            freeze_norm=freeze_norm,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1)))
+                    self.residual_func_list.append(residual_func)
+                elif j < i:
+                    pre_num_filters = in_channels[j]
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvNormLayer(
+                                    ch_in=pre_num_filters,
+                                    ch_out=out_channels[i],
+                                    filter_size=3,
+                                    stride=2,
+                                    norm_decay=norm_decay,
+                                    freeze_norm=freeze_norm,
+                                    act=None,
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[i]
+                        else:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvNormLayer(
+                                    ch_in=pre_num_filters,
+                                    ch_out=out_channels[j],
+                                    filter_size=3,
+                                    stride=2,
+                                    norm_decay=norm_decay,
+                                    freeze_norm=freeze_norm,
+                                    act="relu",
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[j]
+                        self.residual_func_list.append(residual_func)
+
+    def forward(self, input):
+        outs = []
+        residual_func_idx = 0
+        for i in range(self._actual_ch):
+            residual = input[i]
+            for j in range(len(self._in_channels)):
+                if j > i:
+                    y = self.residual_func_list[residual_func_idx](input[j])
+                    residual_func_idx += 1
+                    y = F.interpolate(y, scale_factor=2**(j - i))
+                    residual = paddle.add(x=residual, y=y)
+                elif j < i:
+                    y = input[j]
+                    for k in range(i - j):
+                        y = self.residual_func_list[residual_func_idx](y)
+                        residual_func_idx += 1
+
+                    residual = paddle.add(x=residual, y=y)
+            residual = F.relu(residual)
+            outs.append(residual)
+
+        return outs
+
+
+@register
+class HRNet(nn.Layer):
+    """
+    HRNet, see https://arxiv.org/abs/1908.07919
+
+    Args:
+        width (int): the width of HRNet
+        has_se (bool): whether to add SE block for each stage
+        freeze_at (int): the stage to freeze
+        freeze_norm (bool): whether to freeze norm in HRNet
+        norm_decay (float): weight decay for normalization layer weights
+        return_idx (List): the stage to return
+    """
+
+    def __init__(self,
+                 width=18,
+                 has_se=False,
+                 freeze_at=0,
+                 freeze_norm=True,
+                 norm_decay=0.,
+                 return_idx=[0, 1, 2, 3]):
+        super(HRNet, self).__init__()
+
+        self.width = width
+        self.has_se = has_se
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+
+        assert len(return_idx) > 0, "need one or more return index"
+        self.freeze_at = freeze_at
+        self.return_idx = return_idx
+
+        self.channels = {
+            18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
+            30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
+            32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
+            40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
+            48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
+            60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
+            64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]]
+        }
+
+        channels_2, channels_3, channels_4 = self.channels[width]
+        num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
+        self._out_channels = channels_4
+        self._out_strides = [4, 8, 16, 32]
+
+        self.conv_layer1_1 = ConvNormLayer(
+            ch_in=3,
+            ch_out=64,
+            filter_size=3,
+            stride=2,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            act='relu',
+            name="layer1_1")
+
+        self.conv_layer1_2 = ConvNormLayer(
+            ch_in=64,
+            ch_out=64,
+            filter_size=3,
+            stride=2,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            act='relu',
+            name="layer1_2")
+
+        self.la1 = Layer1(
+            num_channels=64,
+            has_se=has_se,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="layer2")
+
+        self.tr1 = TransitionLayer(
+            in_channels=[256],
+            out_channels=channels_2,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="tr1")
+
+        self.st2 = Stage(
+            num_channels=channels_2,
+            num_modules=num_modules_2,
+            num_filters=channels_2,
+            has_se=self.has_se,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="st2")
+
+        self.tr2 = TransitionLayer(
+            in_channels=channels_2,
+            out_channels=channels_3,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="tr2")
+
+        self.st3 = Stage(
+            num_channels=channels_3,
+            num_modules=num_modules_3,
+            num_filters=channels_3,
+            has_se=self.has_se,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="st3")
+
+        self.tr3 = TransitionLayer(
+            in_channels=channels_3,
+            out_channels=channels_4,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="tr3")
+        self.st4 = Stage(
+            num_channels=channels_4,
+            num_modules=num_modules_4,
+            num_filters=channels_4,
+            has_se=self.has_se,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            multi_scale_output=len(return_idx) > 1,
+            name="st4")
+
+    def forward(self, inputs):
+        x = inputs['image']
+        conv1 = self.conv_layer1_1(x)
+        conv2 = self.conv_layer1_2(conv1)
+
+        la1 = self.la1(conv2)
+        tr1 = self.tr1([la1])
+        st2 = self.st2(tr1)
+        tr2 = self.tr2(st2)
+
+        st3 = self.st3(tr2)
+        tr3 = self.tr3(st3)
+
+        st4 = self.st4(tr3)
+
+        res = []
+        for i, layer in enumerate(st4):
+            if i == self.freeze_at:
+                layer.stop_gradient = True
+            if i in self.return_idx:
+                res.append(layer)
+
+        return res
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(
+                channels=self._out_channels[i], stride=self._out_strides[i])
+            for i in self.return_idx
+        ]
--- a/build/lib/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/build/lib/ppdet/modeling/backbones/mobilenet_v1.py
@ -0,0 +1,409 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import KaimingNormal
+from ppdet.core.workspace import register, serializable
+from numbers import Integral
+from ..shape_spec import ShapeSpec
+
+__all__ = ['MobileNet']
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 num_groups=1,
+                 act='relu',
+                 conv_lr=1.,
+                 conv_decay=0.,
+                 norm_decay=0.,
+                 norm_type='bn',
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.act = act
+        self._conv = nn.Conv2D(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                regularizer=L2Decay(conv_decay)),
+            bias_attr=False)
+
+        param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
+        bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
+        if norm_type == 'sync_bn':
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
+        else:
+            self._batch_norm = nn.BatchNorm(
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False)
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        if self.act == "relu":
+            x = F.relu(x)
+        elif self.act == "relu6":
+            x = F.relu6(x)
+        return x
+
+
+class DepthwiseSeparable(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels1,
+                 out_channels2,
+                 num_groups,
+                 stride,
+                 scale,
+                 conv_lr=1.,
+                 conv_decay=0.,
+                 norm_decay=0.,
+                 norm_type='bn',
+                 name=None):
+        super(DepthwiseSeparable, self).__init__()
+
+        self._depthwise_conv = ConvBNLayer(
+            in_channels,
+            int(out_channels1 * scale),
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            num_groups=int(num_groups * scale),
+            conv_lr=conv_lr,
+            conv_decay=conv_decay,
+            norm_decay=norm_decay,
+            norm_type=norm_type,
+            name=name + "_dw")
+
+        self._pointwise_conv = ConvBNLayer(
+            int(out_channels1 * scale),
+            int(out_channels2 * scale),
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            conv_lr=conv_lr,
+            conv_decay=conv_decay,
+            norm_decay=norm_decay,
+            norm_type=norm_type,
+            name=name + "_sep")
+
+    def forward(self, x):
+        x = self._depthwise_conv(x)
+        x = self._pointwise_conv(x)
+        return x
+
+
+class ExtraBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels1,
+                 out_channels2,
+                 num_groups=1,
+                 stride=2,
+                 conv_lr=1.,
+                 conv_decay=0.,
+                 norm_decay=0.,
+                 norm_type='bn',
+                 name=None):
+        super(ExtraBlock, self).__init__()
+
+        self.pointwise_conv = ConvBNLayer(
+            in_channels,
+            int(out_channels1),
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            num_groups=int(num_groups),
+            act='relu6',
+            conv_lr=conv_lr,
+            conv_decay=conv_decay,
+            norm_decay=norm_decay,
+            norm_type=norm_type,
+            name=name + "_extra1")
+
+        self.normal_conv = ConvBNLayer(
+            int(out_channels1),
+            int(out_channels2),
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            num_groups=int(num_groups),
+            act='relu6',
+            conv_lr=conv_lr,
+            conv_decay=conv_decay,
+            norm_decay=norm_decay,
+            norm_type=norm_type,
+            name=name + "_extra2")
+
+    def forward(self, x):
+        x = self.pointwise_conv(x)
+        x = self.normal_conv(x)
+        return x
+
+
+@register
+@serializable
+class MobileNet(nn.Layer):
+    __shared__ = ['norm_type']
+
+    def __init__(self,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 conv_decay=0.,
+                 scale=1,
+                 conv_learning_rate=1.0,
+                 feature_maps=[4, 6, 13],
+                 with_extra_blocks=False,
+                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
+                                      [64, 128]]):
+        super(MobileNet, self).__init__()
+        if isinstance(feature_maps, Integral):
+            feature_maps = [feature_maps]
+        self.feature_maps = feature_maps
+        self.with_extra_blocks = with_extra_blocks
+        self.extra_block_filters = extra_block_filters
+
+        self._out_channels = []
+
+        self.conv1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=int(32 * scale),
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            conv_lr=conv_learning_rate,
+            conv_decay=conv_decay,
+            norm_decay=norm_decay,
+            norm_type=norm_type,
+            name="conv1")
+
+        self.dwsl = []
+        dws21 = self.add_sublayer(
+            "conv2_1",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(32 * scale),
+                out_channels1=32,
+                out_channels2=64,
+                num_groups=32,
+                stride=1,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv2_1"))
+        self.dwsl.append(dws21)
+        self._update_out_channels(64, len(self.dwsl), feature_maps)
+        dws22 = self.add_sublayer(
+            "conv2_2",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(64 * scale),
+                out_channels1=64,
+                out_channels2=128,
+                num_groups=64,
+                stride=2,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv2_2"))
+        self.dwsl.append(dws22)
+        self._update_out_channels(128, len(self.dwsl), feature_maps)
+        # 1/4
+        dws31 = self.add_sublayer(
+            "conv3_1",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(128 * scale),
+                out_channels1=128,
+                out_channels2=128,
+                num_groups=128,
+                stride=1,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv3_1"))
+        self.dwsl.append(dws31)
+        self._update_out_channels(128, len(self.dwsl), feature_maps)
+        dws32 = self.add_sublayer(
+            "conv3_2",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(128 * scale),
+                out_channels1=128,
+                out_channels2=256,
+                num_groups=128,
+                stride=2,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv3_2"))
+        self.dwsl.append(dws32)
+        self._update_out_channels(256, len(self.dwsl), feature_maps)
+        # 1/8
+        dws41 = self.add_sublayer(
+            "conv4_1",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(256 * scale),
+                out_channels1=256,
+                out_channels2=256,
+                num_groups=256,
+                stride=1,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv4_1"))
+        self.dwsl.append(dws41)
+        self._update_out_channels(256, len(self.dwsl), feature_maps)
+        dws42 = self.add_sublayer(
+            "conv4_2",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(256 * scale),
+                out_channels1=256,
+                out_channels2=512,
+                num_groups=256,
+                stride=2,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv4_2"))
+        self.dwsl.append(dws42)
+        self._update_out_channels(512, len(self.dwsl), feature_maps)
+        # 1/16
+        for i in range(5):
+            tmp = self.add_sublayer(
+                "conv5_" + str(i + 1),
+                sublayer=DepthwiseSeparable(
+                    in_channels=512,
+                    out_channels1=512,
+                    out_channels2=512,
+                    num_groups=512,
+                    stride=1,
+                    scale=scale,
+                    conv_lr=conv_learning_rate,
+                    conv_decay=conv_decay,
+                    norm_decay=norm_decay,
+                    norm_type=norm_type,
+                    name="conv5_" + str(i + 1)))
+            self.dwsl.append(tmp)
+            self._update_out_channels(512, len(self.dwsl), feature_maps)
+        dws56 = self.add_sublayer(
+            "conv5_6",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(512 * scale),
+                out_channels1=512,
+                out_channels2=1024,
+                num_groups=512,
+                stride=2,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv5_6"))
+        self.dwsl.append(dws56)
+        self._update_out_channels(1024, len(self.dwsl), feature_maps)
+        # 1/32
+        dws6 = self.add_sublayer(
+            "conv6",
+            sublayer=DepthwiseSeparable(
+                in_channels=int(1024 * scale),
+                out_channels1=1024,
+                out_channels2=1024,
+                num_groups=1024,
+                stride=1,
+                scale=scale,
+                conv_lr=conv_learning_rate,
+                conv_decay=conv_decay,
+                norm_decay=norm_decay,
+                norm_type=norm_type,
+                name="conv6"))
+        self.dwsl.append(dws6)
+        self._update_out_channels(1024, len(self.dwsl), feature_maps)
+
+        if self.with_extra_blocks:
+            self.extra_blocks = []
+            for i, block_filter in enumerate(self.extra_block_filters):
+                in_c = 1024 if i == 0 else self.extra_block_filters[i - 1][1]
+                conv_extra = self.add_sublayer(
+                    "conv7_" + str(i + 1),
+                    sublayer=ExtraBlock(
+                        in_c,
+                        block_filter[0],
+                        block_filter[1],
+                        conv_lr=conv_learning_rate,
+                        conv_decay=conv_decay,
+                        norm_decay=norm_decay,
+                        norm_type=norm_type,
+                        name="conv7_" + str(i + 1)))
+                self.extra_blocks.append(conv_extra)
+                self._update_out_channels(
+                    block_filter[1],
+                    len(self.dwsl) + len(self.extra_blocks), feature_maps)
+
+    def _update_out_channels(self, channel, feature_idx, feature_maps):
+        if feature_idx in feature_maps:
+            self._out_channels.append(channel)
+
+    def forward(self, inputs):
+        outs = []
+        y = self.conv1(inputs['image'])
+        for i, block in enumerate(self.dwsl):
+            y = block(y)
+            if i + 1 in self.feature_maps:
+                outs.append(y)
+
+        if not self.with_extra_blocks:
+            return outs
+
+        y = outs[-1]
+        for i, block in enumerate(self.extra_blocks):
+            idx = i + len(self.dwsl)
+            y = block(y)
+            if idx + 1 in self.feature_maps:
+                outs.append(y)
+        return outs
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/mobilenet_v3.py
+++ b/build/lib/ppdet/modeling/backbones/mobilenet_v3.py
@ -0,0 +1,496 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from ppdet.core.workspace import register, serializable
+from numbers import Integral
+from ..shape_spec import ShapeSpec
+
+__all__ = ['MobileNetV3']
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 padding,
+                 num_groups=1,
+                 act=None,
+                 lr_mult=1.,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 name=""):
+        super(ConvBNLayer, self).__init__()
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_c,
+            out_channels=out_c,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_weights"),
+            bias_attr=False)
+
+        norm_lr = 0. if freeze_norm else lr_mult
+        param_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay),
+            name=name + "_bn_scale",
+            trainable=False if freeze_norm else True)
+        bias_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay),
+            name=name + "_bn_offset",
+            trainable=False if freeze_norm else True)
+        global_stats = True if freeze_norm else False
+        if norm_type == 'sync_bn':
+            self.bn = nn.SyncBatchNorm(
+                out_c, weight_attr=param_attr, bias_attr=bias_attr)
+        else:
+            self.bn = nn.BatchNorm(
+                out_c,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=global_stats,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
+        norm_params = self.bn.parameters()
+        if freeze_norm:
+            for param in norm_params:
+                param.stop_gradient = True
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.act is not None:
+            if self.act == "relu":
+                x = F.relu(x)
+            elif self.act == "relu6":
+                x = F.relu6(x)
+            elif self.act == "hard_swish":
+                x = F.hardswish(x)
+            else:
+                raise NotImplementedError(
+                    "The activation function is selected incorrectly.")
+        return x
+
+
+class ResidualUnit(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 mid_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 use_se,
+                 lr_mult,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 act=None,
+                 return_list=False,
+                 name=''):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_c == out_c
+        self.use_se = use_se
+        self.return_list = return_list
+
+        self.expand_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=mid_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=act,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_expand")
+        self.bottleneck_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=mid_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            num_groups=mid_c,
+            act=act,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_depthwise")
+        if self.use_se:
+            self.mid_se = SEModule(
+                mid_c, lr_mult, conv_decay, name=name + "_se")
+        self.linear_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=out_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_linear")
+
+    def forward(self, inputs):
+        y = self.expand_conv(inputs)
+        x = self.bottleneck_conv(y)
+        if self.use_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = paddle.add(inputs, x)
+        if self.return_list:
+            return [y, x]
+        else:
+            return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, channel, lr_mult, conv_decay, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        mid_channels = int(channel // reduction)
+        self.conv1 = nn.Conv2D(
+            in_channels=channel,
+            out_channels=mid_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_offset"))
+        self.conv2 = nn.Conv2D(
+            in_channels=mid_channels,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
+        return paddle.multiply(x=inputs, y=outputs)
+
+
+class ExtraBlockDW(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 ch_1,
+                 ch_2,
+                 stride,
+                 lr_mult,
+                 conv_decay=0.,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 name=None):
+        super(ExtraBlockDW, self).__init__()
+        self.pointwise_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=ch_1,
+            filter_size=1,
+            stride=1,
+            padding='SAME',
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra1")
+        self.depthwise_conv = ConvBNLayer(
+            in_c=ch_1,
+            out_c=ch_2,
+            filter_size=3,
+            stride=stride,
+            padding='SAME',
+            num_groups=int(ch_1),
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra2_dw")
+        self.normal_conv = ConvBNLayer(
+            in_c=ch_2,
+            out_c=ch_2,
+            filter_size=1,
+            stride=1,
+            padding='SAME',
+            act='relu6',
+            lr_mult=lr_mult,
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name=name + "_extra2_sep")
+
+    def forward(self, inputs):
+        x = self.pointwise_conv(inputs)
+        x = self.depthwise_conv(x)
+        x = self.normal_conv(x)
+        return x
+
+
+@register
+@serializable
+class MobileNetV3(nn.Layer):
+    __shared__ = ['norm_type']
+
+    def __init__(
+            self,
+            scale=1.0,
+            model_name="large",
+            feature_maps=[6, 12, 15],
+            with_extra_blocks=False,
+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+            conv_decay=0.0,
+            multiplier=1.0,
+            norm_type='bn',
+            norm_decay=0.0,
+            freeze_norm=False):
+        super(MobileNetV3, self).__init__()
+        if isinstance(feature_maps, Integral):
+            feature_maps = [feature_maps]
+        if norm_type == 'sync_bn' and freeze_norm:
+            raise ValueError(
+                "The norm_type should not be sync_bn when freeze_norm is True")
+        self.feature_maps = feature_maps
+        self.with_extra_blocks = with_extra_blocks
+        self.extra_block_filters = extra_block_filters
+
+        inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],  # RCNN output
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],  # YOLOv3 output
+                [3, 240, 80, False, "hard_swish", 2],  # RCNN output
+                [3, 200, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 480, 112, True, "hard_swish", 1],
+                [3, 672, 112, True, "hard_swish", 1],  # YOLOv3 output
+                [5, 672, 160, True, "hard_swish", 2],  # SSD/SSDLite/RCNN output
+                [5, 960, 160, True, "hard_swish", 1],
+                [5, 960, 160, True, "hard_swish", 1],  # YOLOv3 output
+            ]
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],  # RCNN output
+                [3, 88, 24, False, "relu", 1],  # YOLOv3 output
+                [5, 96, 40, True, "hard_swish", 2],  # RCNN output
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 120, 48, True, "hard_swish", 1],
+                [5, 144, 48, True, "hard_swish", 1],  # YOLOv3 output
+                [5, 288, 96, True, "hard_swish", 2],  # SSD/SSDLite/RCNN output
+                [5, 576, 96, True, "hard_swish", 1],
+                [5, 576, 96, True, "hard_swish", 1],  # YOLOv3 output
+            ]
+        else:
+            raise NotImplementedError(
+                "mode[{}_model] is not implemented!".format(model_name))
+
+        if multiplier != 1.0:
+            self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier)
+            self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier)
+            self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier)
+            self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier)
+            self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier)
+
+        self.conv1 = ConvBNLayer(
+            in_c=3,
+            out_c=make_divisible(inplanes * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            act="hard_swish",
+            lr_mult=lr_mult_list[0],
+            conv_decay=conv_decay,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            name="conv1")
+
+        self._out_channels = []
+        self.block_list = []
+        i = 0
+        inplanes = make_divisible(inplanes * scale)
+        for (k, exp, c, se, nl, s) in self.cfg:
+            lr_idx = min(i // 3, len(lr_mult_list) - 1)
+            lr_mult = lr_mult_list[lr_idx]
+
+            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
+            return_list = self.with_extra_blocks and i + 2 in self.feature_maps
+
+            block = self.add_sublayer(
+                "conv" + str(i + 2),
+                sublayer=ResidualUnit(
+                    in_c=inplanes,
+                    mid_c=make_divisible(scale * exp),
+                    out_c=make_divisible(scale * c),
+                    filter_size=k,
+                    stride=s,
+                    use_se=se,
+                    act=nl,
+                    lr_mult=lr_mult,
+                    conv_decay=conv_decay,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    return_list=return_list,
+                    name="conv" + str(i + 2)))
+            self.block_list.append(block)
+            inplanes = make_divisible(scale * c)
+            i += 1
+            self._update_out_channels(
+                make_divisible(scale * exp)
+                if return_list else inplanes, i + 1, feature_maps)
+
+        if self.with_extra_blocks:
+            self.extra_block_list = []
+            extra_out_c = make_divisible(scale * self.cfg[-1][1])
+            lr_idx = min(i // 3, len(lr_mult_list) - 1)
+            lr_mult = lr_mult_list[lr_idx]
+
+            conv_extra = self.add_sublayer(
+                "conv" + str(i + 2),
+                sublayer=ConvBNLayer(
+                    in_c=inplanes,
+                    out_c=extra_out_c,
+                    filter_size=1,
+                    stride=1,
+                    padding=0,
+                    num_groups=1,
+                    act="hard_swish",
+                    lr_mult=lr_mult,
+                    conv_decay=conv_decay,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    name="conv" + str(i + 2)))
+            self.extra_block_list.append(conv_extra)
+            i += 1
+            self._update_out_channels(extra_out_c, i + 1, feature_maps)
+
+            for j, block_filter in enumerate(self.extra_block_filters):
+                in_c = extra_out_c if j == 0 else self.extra_block_filters[j -
+                                                                           1][1]
+                conv_extra = self.add_sublayer(
+                    "conv" + str(i + 2),
+                    sublayer=ExtraBlockDW(
+                        in_c,
+                        block_filter[0],
+                        block_filter[1],
+                        stride=2,
+                        lr_mult=lr_mult,
+                        conv_decay=conv_decay,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        name='conv' + str(i + 2)))
+                self.extra_block_list.append(conv_extra)
+                i += 1
+                self._update_out_channels(block_filter[1], i + 1, feature_maps)
+
+    def _update_out_channels(self, channel, feature_idx, feature_maps):
+        if feature_idx in feature_maps:
+            self._out_channels.append(channel)
+
+    def forward(self, inputs):
+        x = self.conv1(inputs['image'])
+        outs = []
+        for idx, block in enumerate(self.block_list):
+            x = block(x)
+            if idx + 2 in self.feature_maps:
+                if isinstance(x, list):
+                    outs.append(x[0])
+                    x = x[1]
+                else:
+                    outs.append(x)
+
+        if not self.with_extra_blocks:
+            return outs
+
+        for i, block in enumerate(self.extra_block_list):
+            idx = i + len(self.block_list)
+            x = block(x)
+            if idx + 2 in self.feature_maps:
+                outs.append(x)
+        return outs
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/name_adapter.py
+++ b/build/lib/ppdet/modeling/backbones/name_adapter.py
@ -0,0 +1,69 @@
+class NameAdapter(object):
+    """Fix the backbones variable names for pretrained weight"""
+
+    def __init__(self, model):
+        super(NameAdapter, self).__init__()
+        self.model = model
+
+    @property
+    def model_type(self):
+        return getattr(self.model, '_model_type', '')
+
+    @property
+    def variant(self):
+        return getattr(self.model, 'variant', '')
+
+    def fix_conv_norm_name(self, name):
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        # the naming rule is same as pretrained weight
+        if self.model_type == 'SEResNeXt':
+            bn_name = name + "_bn"
+        return bn_name
+
+    def fix_shortcut_name(self, name):
+        if self.model_type == 'SEResNeXt':
+            name = 'conv' + name + '_prj'
+        return name
+
+    def fix_bottleneck_name(self, name):
+        if self.model_type == 'SEResNeXt':
+            conv_name1 = 'conv' + name + '_x1'
+            conv_name2 = 'conv' + name + '_x2'
+            conv_name3 = 'conv' + name + '_x3'
+            shortcut_name = name
+        else:
+            conv_name1 = name + "_branch2a"
+            conv_name2 = name + "_branch2b"
+            conv_name3 = name + "_branch2c"
+            shortcut_name = name + "_branch1"
+        return conv_name1, conv_name2, conv_name3, shortcut_name
+
+    def fix_basicblock_name(self, name):
+        if self.model_type == 'SEResNeXt':
+            conv_name1 = 'conv' + name + '_x1'
+            conv_name2 = 'conv' + name + '_x2'
+            shortcut_name = name
+        else:
+            conv_name1 = name + "_branch2a"
+            conv_name2 = name + "_branch2b"
+            shortcut_name = name + "_branch1"
+        return conv_name1, conv_name2, shortcut_name
+
+    def fix_layer_warp_name(self, stage_num, count, i):
+        name = 'res' + str(stage_num)
+        if count > 10 and stage_num == 4:
+            if i == 0:
+                conv_name = name + "a"
+            else:
+                conv_name = name + "b" + str(i)
+        else:
+            conv_name = name + chr(ord("a") + i)
+        if self.model_type == 'SEResNeXt':
+            conv_name = str(stage_num + 2) + '_' + str(i + 1)
+        return conv_name
+
+    def fix_c1_stage_name(self):
+        return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
--- a/build/lib/ppdet/modeling/backbones/res2net.py
+++ b/build/lib/ppdet/modeling/backbones/res2net.py
@ -0,0 +1,357 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from numbers import Integral
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+from .resnet import ConvNormLayer
+
+__all__ = ['Res2Net', 'Res2NetC5']
+
+Res2Net_cfg = {
+    50: [3, 4, 6, 3],
+    101: [3, 4, 23, 3],
+    152: [3, 8, 36, 3],
+    200: [3, 12, 48, 3]
+}
+
+
+class BottleNeck(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 stride,
+                 shortcut,
+                 width,
+                 scales=4,
+                 variant='b',
+                 groups=1,
+                 lr=1.0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 dcn_v2=False):
+        super(BottleNeck, self).__init__()
+
+        self.shortcut = shortcut
+        self.scales = scales
+        self.stride = stride
+        if not shortcut:
+            if variant == 'd' and stride == 2:
+                self.branch1 = nn.Sequential()
+                self.branch1.add_sublayer(
+                    'pool',
+                    nn.AvgPool2D(
+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
+                self.branch1.add_sublayer(
+                    'conv',
+                    ConvNormLayer(
+                        ch_in=ch_in,
+                        ch_out=ch_out,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        lr=lr))
+            else:
+                self.branch1 = ConvNormLayer(
+                    ch_in=ch_in,
+                    ch_out=ch_out,
+                    filter_size=1,
+                    stride=stride,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    lr=lr)
+
+        self.branch2a = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=width * scales,
+            filter_size=1,
+            stride=stride if variant == 'a' else 1,
+            groups=1,
+            act='relu',
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+        self.branch2b = nn.LayerList([
+            ConvNormLayer(
+                ch_in=width,
+                ch_out=width,
+                filter_size=3,
+                stride=1 if variant == 'a' else stride,
+                groups=groups,
+                act='relu',
+                norm_type=norm_type,
+                norm_decay=norm_decay,
+                freeze_norm=freeze_norm,
+                lr=lr,
+                dcn_v2=dcn_v2) for _ in range(self.scales - 1)
+        ])
+
+        self.branch2c = ConvNormLayer(
+            ch_in=width * scales,
+            ch_out=ch_out,
+            filter_size=1,
+            stride=1,
+            groups=1,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+    def forward(self, inputs):
+
+        out = self.branch2a(inputs)
+        feature_split = paddle.split(out, self.scales, 1)
+        out_split = []
+        for i in range(self.scales - 1):
+            if i == 0 or self.stride == 2:
+                out_split.append(self.branch2b[i](feature_split[i]))
+            else:
+                out_split.append(self.branch2b[i](paddle.add(feature_split[i],
+                                                             out_split[-1])))
+        if self.stride == 1:
+            out_split.append(feature_split[-1])
+        else:
+            out_split.append(F.avg_pool2d(feature_split[-1], 3, self.stride, 1))
+        out = self.branch2c(paddle.concat(out_split, 1))
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.branch1(inputs)
+
+        out = paddle.add(out, short)
+        out = F.relu(out)
+
+        return out
+
+
+class Blocks(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 count,
+                 stage_num,
+                 width,
+                 scales=4,
+                 variant='b',
+                 groups=1,
+                 lr=1.0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 dcn_v2=False):
+        super(Blocks, self).__init__()
+
+        self.blocks = nn.Sequential()
+        for i in range(count):
+            self.blocks.add_sublayer(
+                str(i),
+                BottleNeck(
+                    ch_in=ch_in if i == 0 else ch_out,
+                    ch_out=ch_out,
+                    stride=2 if i == 0 and stage_num != 2 else 1,
+                    shortcut=False if i == 0 else True,
+                    width=width * (2**(stage_num - 2)),
+                    scales=scales,
+                    variant=variant,
+                    groups=groups,
+                    lr=lr,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    dcn_v2=dcn_v2))
+
+    def forward(self, inputs):
+        return self.blocks(inputs)
+
+
+@register
+@serializable
+class Res2Net(nn.Layer):
+    """
+    Res2Net, see https://arxiv.org/abs/1904.01169
+    Args:
+        depth (int): Res2Net depth, should be 50, 101, 152, 200.
+        width (int): Res2Net width
+        scales (int): Res2Net scale
+        variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently
+        lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
+                             lower learning rate ratio is need for pretrained model
+                             got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
+        groups (int): The groups number of the Conv Layer.
+        norm_type (str): normalization type, 'bn' or 'sync_bn'
+        norm_decay (float): weight decay for normalization layer weights
+        freeze_norm (bool): freeze normalization layers
+        freeze_at (int): freeze the backbone at which stage
+        return_idx (list): index of stages whose feature maps are returned,
+                           index 0 stands for res2
+        dcn_v2_stages (list): index of stages who select deformable conv v2
+        num_stages (int): number of stages created
+
+    """
+    __shared__ = ['norm_type']
+
+    def __init__(self,
+                 depth=50,
+                 width=26,
+                 scales=4,
+                 variant='b',
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
+                 groups=1,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 freeze_at=0,
+                 return_idx=[0, 1, 2, 3],
+                 dcn_v2_stages=[-1],
+                 num_stages=4):
+        super(Res2Net, self).__init__()
+
+        self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt'
+
+        assert depth in [50, 101, 152, 200], \
+            "depth {} not in [50, 101, 152, 200]"
+        assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant"
+        assert num_stages >= 1 and num_stages <= 4
+
+        self.depth = depth
+        self.variant = variant
+        self.norm_type = norm_type
+        self.norm_decay = norm_decay
+        self.freeze_norm = freeze_norm
+        self.freeze_at = freeze_at
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+        assert max(return_idx) < num_stages, \
+            'the maximum return index must smaller than num_stages, ' \
+            'but received maximum return index is {} and num_stages ' \
+            'is {}'.format(max(return_idx), num_stages)
+        self.return_idx = return_idx
+        self.num_stages = num_stages
+        assert len(lr_mult_list) == 4, \
+            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
+        if isinstance(dcn_v2_stages, Integral):
+            dcn_v2_stages = [dcn_v2_stages]
+        assert max(dcn_v2_stages) < num_stages
+        self.dcn_v2_stages = dcn_v2_stages
+
+        block_nums = Res2Net_cfg[depth]
+
+        # C1 stage
+        if self.variant in ['c', 'd']:
+            conv_def = [
+                [3, 32, 3, 2, "conv1_1"],
+                [32, 32, 3, 1, "conv1_2"],
+                [32, 64, 3, 1, "conv1_3"],
+            ]
+        else:
+            conv_def = [[3, 64, 7, 2, "conv1"]]
+        self.res1 = nn.Sequential()
+        for (c_in, c_out, k, s, _name) in conv_def:
+            self.res1.add_sublayer(
+                _name,
+                ConvNormLayer(
+                    ch_in=c_in,
+                    ch_out=c_out,
+                    filter_size=k,
+                    stride=s,
+                    groups=1,
+                    act='relu',
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    lr=1.0))
+
+        self._in_channels = [64, 256, 512, 1024]
+        self._out_channels = [256, 512, 1024, 2048]
+        self._out_strides = [4, 8, 16, 32]
+
+        # C2-C5 stages
+        self.res_layers = []
+        for i in range(num_stages):
+            lr_mult = lr_mult_list[i]
+            stage_num = i + 2
+            self.res_layers.append(
+                self.add_sublayer(
+                    "res{}".format(stage_num),
+                    Blocks(
+                        self._in_channels[i],
+                        self._out_channels[i],
+                        count=block_nums[i],
+                        stage_num=stage_num,
+                        width=width,
+                        scales=scales,
+                        groups=groups,
+                        lr=lr_mult,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        dcn_v2=(i in self.dcn_v2_stages))))
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(
+                channels=self._out_channels[i], stride=self._out_strides[i])
+            for i in self.return_idx
+        ]
+
+    def forward(self, inputs):
+        x = inputs['image']
+        res1 = self.res1(x)
+        x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1)
+        outs = []
+        for idx, stage in enumerate(self.res_layers):
+            x = stage(x)
+            if idx == self.freeze_at:
+                x.stop_gradient = True
+            if idx in self.return_idx:
+                outs.append(x)
+        return outs
+
+
+@register
+class Res2NetC5(nn.Layer):
+    def __init__(self, depth=50, width=26, scales=4, variant='b'):
+        super(Res2NetC5, self).__init__()
+        feat_in, feat_out = [1024, 2048]
+        self.res5 = Blocks(
+            feat_in,
+            feat_out,
+            count=3,
+            stage_num=5,
+            width=width,
+            scales=scales,
+            variant=variant)
+        self.feat_out = feat_out
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(
+            channels=self.feat_out,
+            stride=32, )]
+
+    def forward(self, roi_feat, stage=0):
+        y = self.res5(roi_feat)
+        return y
--- a/build/lib/ppdet/modeling/backbones/resnet.py
+++ b/build/lib/ppdet/modeling/backbones/resnet.py
@ -0,0 +1,606 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import math
+from numbers import Integral
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register, serializable
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Uniform
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+from paddle.vision.ops import DeformConv2D
+from .name_adapter import NameAdapter
+from ..shape_spec import ShapeSpec
+
+__all__ = ['ResNet', 'Res5Head', 'Blocks', 'BasicBlock', 'BottleNeck']
+
+ResNet_cfg = {
+    18: [2, 2, 2, 2],
+    34: [3, 4, 6, 3],
+    50: [3, 4, 6, 3],
+    101: [3, 4, 23, 3],
+    152: [3, 8, 36, 3],
+}
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride,
+                 groups=1,
+                 act=None,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 lr=1.0,
+                 dcn_v2=False):
+        super(ConvNormLayer, self).__init__()
+        assert norm_type in ['bn', 'sync_bn']
+        self.norm_type = norm_type
+        self.act = act
+        self.dcn_v2 = dcn_v2
+
+        if not self.dcn_v2:
+            self.conv = nn.Conv2D(
+                in_channels=ch_in,
+                out_channels=ch_out,
+                kernel_size=filter_size,
+                stride=stride,
+                padding=(filter_size - 1) // 2,
+                groups=groups,
+                weight_attr=ParamAttr(learning_rate=lr),
+                bias_attr=False)
+        else:
+            self.offset_channel = 2 * filter_size**2
+            self.mask_channel = filter_size**2
+
+            self.conv_offset = nn.Conv2D(
+                in_channels=ch_in,
+                out_channels=3 * filter_size**2,
+                kernel_size=filter_size,
+                stride=stride,
+                padding=(filter_size - 1) // 2,
+                weight_attr=ParamAttr(initializer=Constant(0.)),
+                bias_attr=ParamAttr(initializer=Constant(0.)))
+            self.conv = DeformConv2D(
+                in_channels=ch_in,
+                out_channels=ch_out,
+                kernel_size=filter_size,
+                stride=stride,
+                padding=(filter_size - 1) // 2,
+                dilation=1,
+                groups=groups,
+                weight_attr=ParamAttr(learning_rate=lr),
+                bias_attr=False)
+
+        norm_lr = 0. if freeze_norm else lr
+        param_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay),
+            trainable=False if freeze_norm else True)
+        bias_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay),
+            trainable=False if freeze_norm else True)
+
+        global_stats = True if freeze_norm else False
+        if norm_type == 'sync_bn':
+            self.norm = nn.SyncBatchNorm(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        else:
+            self.norm = nn.BatchNorm(
+                ch_out,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=global_stats)
+        norm_params = self.norm.parameters()
+
+        if freeze_norm:
+            for param in norm_params:
+                param.stop_gradient = True
+
+    def forward(self, inputs):
+        if not self.dcn_v2:
+            out = self.conv(inputs)
+        else:
+            offset_mask = self.conv_offset(inputs)
+            offset, mask = paddle.split(
+                offset_mask,
+                num_or_sections=[self.offset_channel, self.mask_channel],
+                axis=1)
+            mask = F.sigmoid(mask)
+            out = self.conv(inputs, offset, mask=mask)
+
+        if self.norm_type in ['bn', 'sync_bn']:
+            out = self.norm(out)
+        if self.act:
+            out = getattr(F, self.act)(out)
+        return out
+
+
+class SELayer(nn.Layer):
+    def __init__(self, ch, reduction_ratio=16):
+        super(SELayer, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2D(1)
+        stdv = 1.0 / math.sqrt(ch)
+        c_ = ch // reduction_ratio
+        self.squeeze = nn.Linear(
+            ch,
+            c_,
+            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
+            bias_attr=True)
+
+        stdv = 1.0 / math.sqrt(c_)
+        self.extract = nn.Linear(
+            c_,
+            ch,
+            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
+            bias_attr=True)
+
+    def forward(self, inputs):
+        out = self.pool(inputs)
+        out = paddle.squeeze(out, axis=[2, 3])
+        out = self.squeeze(out)
+        out = F.relu(out)
+        out = self.extract(out)
+        out = F.sigmoid(out)
+        out = paddle.unsqueeze(out, axis=[2, 3])
+        scale = out * inputs
+        return scale
+
+
+class BasicBlock(nn.Layer):
+
+    expansion = 1
+
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 stride,
+                 shortcut,
+                 variant='b',
+                 groups=1,
+                 base_width=64,
+                 lr=1.0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 dcn_v2=False,
+                 std_senet=False):
+        super(BasicBlock, self).__init__()
+        assert dcn_v2 is False, "Not implemented yet."
+        assert groups == 1 and base_width == 64, 'BasicBlock only supports groups=1 and base_width=64'
+
+        self.shortcut = shortcut
+        if not shortcut:
+            if variant == 'd' and stride == 2:
+                self.short = nn.Sequential()
+                self.short.add_sublayer(
+                    'pool',
+                    nn.AvgPool2D(
+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
+                self.short.add_sublayer(
+                    'conv',
+                    ConvNormLayer(
+                        ch_in=ch_in,
+                        ch_out=ch_out,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        lr=lr))
+            else:
+                self.short = ConvNormLayer(
+                    ch_in=ch_in,
+                    ch_out=ch_out,
+                    filter_size=1,
+                    stride=stride,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    lr=lr)
+
+        self.branch2a = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+        self.branch2b = ConvNormLayer(
+            ch_in=ch_out,
+            ch_out=ch_out,
+            filter_size=3,
+            stride=1,
+            act=None,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+        self.std_senet = std_senet
+        if self.std_senet:
+            self.se = SELayer(ch_out)
+
+    def forward(self, inputs):
+        out = self.branch2a(inputs)
+        out = self.branch2b(out)
+        if self.std_senet:
+            out = self.se(out)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+
+        out = paddle.add(x=out, y=short)
+        out = F.relu(out)
+
+        return out
+
+
+class BottleNeck(nn.Layer):
+
+    expansion = 4
+
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 stride,
+                 shortcut,
+                 variant='b',
+                 groups=1,
+                 base_width=4,
+                 lr=1.0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 dcn_v2=False,
+                 std_senet=False):
+        super(BottleNeck, self).__init__()
+        if variant == 'a':
+            stride1, stride2 = stride, 1
+        else:
+            stride1, stride2 = 1, stride
+
+        # ResNeXt
+        width = int(ch_out * (base_width / 64.)) * groups
+
+        self.shortcut = shortcut
+        if not shortcut:
+            if variant == 'd' and stride == 2:
+                self.short = nn.Sequential()
+                self.short.add_sublayer(
+                    'pool',
+                    nn.AvgPool2D(
+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
+                self.short.add_sublayer(
+                    'conv',
+                    ConvNormLayer(
+                        ch_in=ch_in,
+                        ch_out=ch_out * self.expansion,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=norm_type,
+                        norm_decay=norm_decay,
+                        freeze_norm=freeze_norm,
+                        lr=lr))
+            else:
+                self.short = ConvNormLayer(
+                    ch_in=ch_in,
+                    ch_out=ch_out * self.expansion,
+                    filter_size=1,
+                    stride=stride,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    lr=lr)
+
+        self.branch2a = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=width,
+            filter_size=1,
+            stride=stride1,
+            groups=1,
+            act='relu',
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+        self.branch2b = ConvNormLayer(
+            ch_in=width,
+            ch_out=width,
+            filter_size=3,
+            stride=stride2,
+            groups=groups,
+            act='relu',
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr,
+            dcn_v2=dcn_v2)
+
+        self.branch2c = ConvNormLayer(
+            ch_in=width,
+            ch_out=ch_out * self.expansion,
+            filter_size=1,
+            stride=1,
+            groups=1,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            lr=lr)
+
+        self.std_senet = std_senet
+        if self.std_senet:
+            self.se = SELayer(ch_out * self.expansion)
+
+    def forward(self, inputs):
+
+        out = self.branch2a(inputs)
+        out = self.branch2b(out)
+        out = self.branch2c(out)
+
+        if self.std_senet:
+            out = self.se(out)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+
+        out = paddle.add(x=out, y=short)
+        out = F.relu(out)
+
+        return out
+
+
+class Blocks(nn.Layer):
+    def __init__(self,
+                 block,
+                 ch_in,
+                 ch_out,
+                 count,
+                 name_adapter,
+                 stage_num,
+                 variant='b',
+                 groups=1,
+                 base_width=64,
+                 lr=1.0,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 freeze_norm=True,
+                 dcn_v2=False,
+                 std_senet=False):
+        super(Blocks, self).__init__()
+
+        self.blocks = []
+        for i in range(count):
+            conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
+            layer = self.add_sublayer(
+                conv_name,
+                block(
+                    ch_in=ch_in,
+                    ch_out=ch_out,
+                    stride=2 if i == 0 and stage_num != 2 else 1,
+                    shortcut=False if i == 0 else True,
+                    variant=variant,
+                    groups=groups,
+                    base_width=base_width,
+                    lr=lr,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    dcn_v2=dcn_v2,
+                    std_senet=std_senet))
+            self.blocks.append(layer)
+            if i == 0:
+                ch_in = ch_out * block.expansion
+
+    def forward(self, inputs):
+        block_out = inputs
+        for block in self.blocks:
+            block_out = block(block_out)
+        return block_out
+
+
+@register
+@serializable
+class ResNet(nn.Layer):
+    __shared__ = ['norm_type']
+
+    def __init__(self,
+                 depth=50,
+                 ch_in=64,
+                 variant='b',
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
+                 groups=1,
+                 base_width=64,
+                 norm_type='bn',
+                 norm_decay=0,
+                 freeze_norm=True,
+                 freeze_at=0,
+                 return_idx=[0, 1, 2, 3],
+                 dcn_v2_stages=[-1],
+                 num_stages=4,
+                 std_senet=False):
+        """
+        Residual Network, see https://arxiv.org/abs/1512.03385
+        
+        Args:
+            depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
+            ch_in (int): output channel of first stage, default 64
+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
+            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
+                                 lower learning rate ratio is need for pretrained model 
+                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
+            groups (int): group convolution cardinality
+            base_width (int): base width of each group convolution
+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
+            norm_decay (float): weight decay for normalization layer weights
+            freeze_norm (bool): freeze normalization layers
+            freeze_at (int): freeze the backbone at which stage
+            return_idx (list): index of the stages whose feature maps are returned
+            dcn_v2_stages (list): index of stages who select deformable conv v2
+            num_stages (int): total num of stages
+            std_senet (bool): whether use senet, default True
+        """
+        super(ResNet, self).__init__()
+        self._model_type = 'ResNet' if groups == 1 else 'ResNeXt'
+        assert num_stages >= 1 and num_stages <= 4
+        self.depth = depth
+        self.variant = variant
+        self.groups = groups
+        self.base_width = base_width
+        self.norm_type = norm_type
+        self.norm_decay = norm_decay
+        self.freeze_norm = freeze_norm
+        self.freeze_at = freeze_at
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+        assert max(return_idx) < num_stages, \
+            'the maximum return index must smaller than num_stages, ' \
+            'but received maximum return index is {} and num_stages ' \
+            'is {}'.format(max(return_idx), num_stages)
+        self.return_idx = return_idx
+        self.num_stages = num_stages
+        assert len(lr_mult_list) == 4, \
+            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
+        if isinstance(dcn_v2_stages, Integral):
+            dcn_v2_stages = [dcn_v2_stages]
+        assert max(dcn_v2_stages) < num_stages
+
+        if isinstance(dcn_v2_stages, Integral):
+            dcn_v2_stages = [dcn_v2_stages]
+        assert max(dcn_v2_stages) < num_stages
+        self.dcn_v2_stages = dcn_v2_stages
+
+        block_nums = ResNet_cfg[depth]
+        na = NameAdapter(self)
+
+        conv1_name = na.fix_c1_stage_name()
+        if variant in ['c', 'd']:
+            conv_def = [
+                [3, ch_in // 2, 3, 2, "conv1_1"],
+                [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
+                [ch_in // 2, ch_in, 3, 1, "conv1_3"],
+            ]
+        else:
+            conv_def = [[3, ch_in, 7, 2, conv1_name]]
+        self.conv1 = nn.Sequential()
+        for (c_in, c_out, k, s, _name) in conv_def:
+            self.conv1.add_sublayer(
+                _name,
+                ConvNormLayer(
+                    ch_in=c_in,
+                    ch_out=c_out,
+                    filter_size=k,
+                    stride=s,
+                    groups=1,
+                    act='relu',
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    lr=1.0))
+
+        self.ch_in = ch_in
+        ch_out_list = [64, 128, 256, 512]
+        block = BottleNeck if depth >= 50 else BasicBlock
+
+        self._out_channels = [block.expansion * v for v in ch_out_list]
+        self._out_strides = [4, 8, 16, 32]
+
+        self.res_layers = []
+        for i in range(num_stages):
+            lr_mult = lr_mult_list[i]
+            stage_num = i + 2
+            res_name = "res{}".format(stage_num)
+            res_layer = self.add_sublayer(
+                res_name,
+                Blocks(
+                    block,
+                    self.ch_in,
+                    ch_out_list[i],
+                    count=block_nums[i],
+                    name_adapter=na,
+                    stage_num=stage_num,
+                    variant=variant,
+                    groups=groups,
+                    base_width=base_width,
+                    lr=lr_mult,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    freeze_norm=freeze_norm,
+                    dcn_v2=(i in self.dcn_v2_stages),
+                    std_senet=std_senet))
+            self.res_layers.append(res_layer)
+            self.ch_in = self._out_channels[i]
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(
+                channels=self._out_channels[i], stride=self._out_strides[i])
+            for i in self.return_idx
+        ]
+
+    def forward(self, inputs):
+        x = inputs['image']
+        conv1 = self.conv1(x)
+        x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
+        outs = []
+        for idx, stage in enumerate(self.res_layers):
+            x = stage(x)
+            if idx == self.freeze_at:
+                x.stop_gradient = True
+            if idx in self.return_idx:
+                outs.append(x)
+        return outs
+
+
+@register
+class Res5Head(nn.Layer):
+    def __init__(self, depth=50):
+        super(Res5Head, self).__init__()
+        feat_in, feat_out = [1024, 512]
+        if depth < 50:
+            feat_in = 256
+        na = NameAdapter(self)
+        block = BottleNeck if depth >= 50 else BasicBlock
+        self.res5 = Blocks(
+            block, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)
+        self.feat_out = feat_out if depth < 50 else feat_out * 4
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(
+            channels=self.feat_out,
+            stride=16, )]
+
+    def forward(self, roi_feat, stage=0):
+        y = self.res5(roi_feat)
+        return y
--- a/build/lib/ppdet/modeling/backbones/senet.py
+++ b/build/lib/ppdet/modeling/backbones/senet.py
@ -0,0 +1,139 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle.nn as nn
+
+from ppdet.core.workspace import register, serializable
+from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
+
+__all__ = ['SENet', 'SERes5Head']
+
+
+@register
+@serializable
+class SENet(ResNet):
+    __shared__ = ['norm_type']
+
+    def __init__(self,
+                 depth=50,
+                 variant='b',
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
+                 groups=1,
+                 base_width=64,
+                 norm_type='bn',
+                 norm_decay=0,
+                 freeze_norm=True,
+                 freeze_at=0,
+                 return_idx=[0, 1, 2, 3],
+                 dcn_v2_stages=[-1],
+                 std_senet=True,
+                 num_stages=4):
+        """
+        Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
+        
+        Args:
+            depth (int): SENet depth, should be 50, 101, 152
+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
+            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
+                                 lower learning rate ratio is need for pretrained model 
+                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
+            groups (int): group convolution cardinality
+            base_width (int): base width of each group convolution
+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
+            norm_decay (float): weight decay for normalization layer weights
+            freeze_norm (bool): freeze normalization layers
+            freeze_at (int): freeze the backbone at which stage
+            return_idx (list): index of the stages whose feature maps are returned
+            dcn_v2_stages (list): index of stages who select deformable conv v2
+            std_senet (bool): whether use senet, default True
+            num_stages (int): total num of stages
+        """
+
+        super(SENet, self).__init__(
+            depth=depth,
+            variant=variant,
+            lr_mult_list=lr_mult_list,
+            ch_in=128,
+            groups=groups,
+            base_width=base_width,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            freeze_at=freeze_at,
+            return_idx=return_idx,
+            dcn_v2_stages=dcn_v2_stages,
+            std_senet=std_senet,
+            num_stages=num_stages)
+
+
+@register
+class SERes5Head(nn.Layer):
+    def __init__(self,
+                 depth=50,
+                 variant='b',
+                 lr_mult=1.0,
+                 groups=1,
+                 base_width=64,
+                 norm_type='bn',
+                 norm_decay=0,
+                 dcn_v2=False,
+                 freeze_norm=False,
+                 std_senet=True):
+        """
+        SERes5Head layer
+
+        Args:
+            depth (int): SENet depth, should be 50, 101, 152
+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
+            lr_mult (list): learning rate ratio of SERes5Head, default as 1.0.
+            groups (int): group convolution cardinality
+            base_width (int): base width of each group convolution
+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
+            norm_decay (float): weight decay for normalization layer weights
+            dcn_v2_stages (list): index of stages who select deformable conv v2
+            std_senet (bool): whether use senet, default True
+            
+        """
+        super(SERes5Head, self).__init__()
+        ch_out = 512
+        ch_in = 256 if depth < 50 else 1024
+        na = NameAdapter(self)
+        block = BottleNeck if depth >= 50 else BasicBlock
+        self.res5 = Blocks(
+            block,
+            ch_in,
+            ch_out,
+            count=3,
+            name_adapter=na,
+            stage_num=5,
+            variant=variant,
+            groups=groups,
+            base_width=base_width,
+            lr=lr_mult,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm,
+            dcn_v2=dcn_v2,
+            std_senet=std_senet)
+        self.ch_out = ch_out * block.expansion
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(
+            channels=self.ch_out,
+            stride=16, )]
+
+    def forward(self, roi_feat):
+        y = self.res5(roi_feat)
+        return y
--- a/build/lib/ppdet/modeling/backbones/vgg.py
+++ b/build/lib/ppdet/modeling/backbones/vgg.py
@ -0,0 +1,215 @@
+from __future__ import division
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn import Conv2D, MaxPool2D
+from ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+
+__all__ = ['VGG']
+
+VGG_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]}
+
+
+class ConvBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 groups,
+                 pool_size=2,
+                 pool_stride=2,
+                 pool_padding=0,
+                 name=None):
+        super(ConvBlock, self).__init__()
+
+        self.groups = groups
+        self.conv0 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            weight_attr=ParamAttr(name=name + "1_weights"),
+            bias_attr=ParamAttr(name=name + "1_bias"))
+        self.conv_out_list = []
+        for i in range(1, groups):
+            conv_out = self.add_sublayer(
+                'conv{}'.format(i),
+                Conv2D(
+                    in_channels=out_channels,
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    weight_attr=ParamAttr(
+                        name=name + "{}_weights".format(i + 1)),
+                    bias_attr=ParamAttr(name=name + "{}_bias".format(i + 1))))
+            self.conv_out_list.append(conv_out)
+
+        self.pool = MaxPool2D(
+            kernel_size=pool_size,
+            stride=pool_stride,
+            padding=pool_padding,
+            ceil_mode=True)
+
+    def forward(self, inputs):
+        out = self.conv0(inputs)
+        out = F.relu(out)
+        for conv_i in self.conv_out_list:
+            out = conv_i(out)
+            out = F.relu(out)
+        pool = self.pool(out)
+        return out, pool
+
+
+class ExtraBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 mid_channels,
+                 out_channels,
+                 padding,
+                 stride,
+                 kernel_size,
+                 name=None):
+        super(ExtraBlock, self).__init__()
+
+        self.conv0 = Conv2D(
+            in_channels=in_channels,
+            out_channels=mid_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self.conv1 = Conv2D(
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding)
+
+    def forward(self, inputs):
+        out = self.conv0(inputs)
+        out = F.relu(out)
+        out = self.conv1(out)
+        out = F.relu(out)
+        return out
+
+
+class L2NormScale(nn.Layer):
+    def __init__(self, num_channels, scale=1.0):
+        super(L2NormScale, self).__init__()
+        self.scale = self.create_parameter(
+            attr=ParamAttr(initializer=paddle.nn.initializer.Constant(scale)),
+            shape=[num_channels])
+
+    def forward(self, inputs):
+        out = F.normalize(inputs, axis=1, epsilon=1e-10)
+        # out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
+        #     out) * out
+        out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3) * out
+        return out
+
+
+@register
+@serializable
+class VGG(nn.Layer):
+    def __init__(self,
+                 depth=16,
+                 normalizations=[20., -1, -1, -1, -1, -1],
+                 extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
+                                      [128, 256, 0, 1, 3],
+                                      [128, 256, 0, 1, 3]]):
+        super(VGG, self).__init__()
+
+        assert depth in [16, 19], \
+                "depth as 16/19 supported currently, but got {}".format(depth)
+        self.depth = depth
+        self.groups = VGG_cfg[depth]
+        self.normalizations = normalizations
+        self.extra_block_filters = extra_block_filters
+
+        self._out_channels = []
+
+        self.conv_block_0 = ConvBlock(
+            3, 64, self.groups[0], 2, 2, 0, name="conv1_")
+        self.conv_block_1 = ConvBlock(
+            64, 128, self.groups[1], 2, 2, 0, name="conv2_")
+        self.conv_block_2 = ConvBlock(
+            128, 256, self.groups[2], 2, 2, 0, name="conv3_")
+        self.conv_block_3 = ConvBlock(
+            256, 512, self.groups[3], 2, 2, 0, name="conv4_")
+        self.conv_block_4 = ConvBlock(
+            512, 512, self.groups[4], 3, 1, 1, name="conv5_")
+        self._out_channels.append(512)
+
+        self.fc6 = Conv2D(
+            in_channels=512,
+            out_channels=1024,
+            kernel_size=3,
+            stride=1,
+            padding=6,
+            dilation=6)
+        self.fc7 = Conv2D(
+            in_channels=1024,
+            out_channels=1024,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self._out_channels.append(1024)
+
+        # extra block
+        self.extra_convs = []
+        last_channels = 1024
+        for i, v in enumerate(self.extra_block_filters):
+            assert len(v) == 5, "extra_block_filters size not fix"
+            extra_conv = self.add_sublayer("conv{}".format(6 + i),
+                                           ExtraBlock(last_channels, v[0], v[1],
+                                                      v[2], v[3], v[4]))
+            last_channels = v[1]
+            self.extra_convs.append(extra_conv)
+            self._out_channels.append(last_channels)
+
+        self.norms = []
+        for i, n in enumerate(self.normalizations):
+            if n != -1:
+                norm = self.add_sublayer("norm{}".format(i),
+                                         L2NormScale(
+                                             self.extra_block_filters[i][1], n))
+            else:
+                norm = None
+            self.norms.append(norm)
+
+    def forward(self, inputs):
+        outputs = []
+
+        conv, pool = self.conv_block_0(inputs['image'])
+        conv, pool = self.conv_block_1(pool)
+        conv, pool = self.conv_block_2(pool)
+        conv, pool = self.conv_block_3(pool)
+        outputs.append(conv)
+
+        conv, pool = self.conv_block_4(pool)
+        out = self.fc6(pool)
+        out = F.relu(out)
+        out = self.fc7(out)
+        out = F.relu(out)
+        outputs.append(out)
+
+        if not self.extra_block_filters:
+            return outputs
+
+        # extra block
+        for extra_conv in self.extra_convs:
+            out = extra_conv(out)
+            outputs.append(out)
+
+        for i, n in enumerate(self.normalizations):
+            if n != -1:
+                outputs[i] = self.norms[i](outputs[i])
+
+        return outputs
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/bbox_utils.py
+++ b/build/lib/ppdet/modeling/bbox_utils.py
@ -0,0 +1,459 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+import numpy as np
+
+
+def bbox2delta(src_boxes, tgt_boxes, weights):
+    src_w = src_boxes[:, 2] - src_boxes[:, 0]
+    src_h = src_boxes[:, 3] - src_boxes[:, 1]
+    src_ctr_x = src_boxes[:, 0] + 0.5 * src_w
+    src_ctr_y = src_boxes[:, 1] + 0.5 * src_h
+
+    tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0]
+    tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1]
+    tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w
+    tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h
+
+    wx, wy, ww, wh = weights
+    dx = wx * (tgt_ctr_x - src_ctr_x) / src_w
+    dy = wy * (tgt_ctr_y - src_ctr_y) / src_h
+    dw = ww * paddle.log(tgt_w / src_w)
+    dh = wh * paddle.log(tgt_h / src_h)
+
+    deltas = paddle.stack((dx, dy, dw, dh), axis=1)
+    return deltas
+
+
+def delta2bbox(deltas, boxes, weights):
+    clip_scale = math.log(1000.0 / 16)
+
+    widths = boxes[:, 2] - boxes[:, 0]
+    heights = boxes[:, 3] - boxes[:, 1]
+    ctr_x = boxes[:, 0] + 0.5 * widths
+    ctr_y = boxes[:, 1] + 0.5 * heights
+
+    wx, wy, ww, wh = weights
+    dx = deltas[:, 0::4] / wx
+    dy = deltas[:, 1::4] / wy
+    dw = deltas[:, 2::4] / ww
+    dh = deltas[:, 3::4] / wh
+    # Prevent sending too large values into paddle.exp()
+    dw = paddle.clip(dw, max=clip_scale)
+    dh = paddle.clip(dh, max=clip_scale)
+
+    pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
+    pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
+    pred_w = paddle.exp(dw) * widths.unsqueeze(1)
+    pred_h = paddle.exp(dh) * heights.unsqueeze(1)
+
+    pred_boxes = []
+    pred_boxes.append(pred_ctr_x - 0.5 * pred_w)
+    pred_boxes.append(pred_ctr_y - 0.5 * pred_h)
+    pred_boxes.append(pred_ctr_x + 0.5 * pred_w)
+    pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
+    pred_boxes = paddle.stack(pred_boxes, axis=-1)
+
+    return pred_boxes
+
+
+def expand_bbox(bboxes, scale):
+    w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
+    h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
+    x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
+    y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
+
+    w_half *= scale
+    h_half *= scale
+
+    bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
+    bboxes_exp[:, 0] = x_c - w_half
+    bboxes_exp[:, 2] = x_c + w_half
+    bboxes_exp[:, 1] = y_c - h_half
+    bboxes_exp[:, 3] = y_c + h_half
+
+    return bboxes_exp
+
+
+def clip_bbox(boxes, im_shape):
+    h, w = im_shape[0], im_shape[1]
+    x1 = boxes[:, 0].clip(0, w)
+    y1 = boxes[:, 1].clip(0, h)
+    x2 = boxes[:, 2].clip(0, w)
+    y2 = boxes[:, 3].clip(0, h)
+    return paddle.stack([x1, y1, x2, y2], axis=1)
+
+
+def nonempty_bbox(boxes, min_size=0, return_mask=False):
+    w = boxes[:, 2] - boxes[:, 0]
+    h = boxes[:, 3] - boxes[:, 1]
+    mask = paddle.logical_and(w > min_size, w > min_size)
+    if return_mask:
+        return mask
+    keep = paddle.nonzero(mask).flatten()
+    return keep
+
+
+def bbox_area(boxes):
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+def bbox_overlaps(boxes1, boxes2):
+    """
+    Calculate overlaps between boxes1 and boxes2
+
+    Args:
+        boxes1 (Tensor): boxes with shape [M, 4]
+        boxes2 (Tensor): boxes with shape [N, 4]
+
+    Return:
+        overlaps (Tensor): overlaps between boxes1 and boxes2 with shape [M, N]
+    """
+    M = boxes1.shape[0]
+    N = boxes2.shape[0]
+    if M * N == 0:
+        return paddle.zeros([M, N], dtype='float32')
+    area1 = bbox_area(boxes1)
+    area2 = bbox_area(boxes2)
+
+    xy_max = paddle.minimum(
+        paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:])
+    xy_min = paddle.maximum(
+        paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2])
+    width_height = xy_max - xy_min
+    width_height = width_height.clip(min=0)
+    inter = width_height.prod(axis=2)
+
+    overlaps = paddle.where(inter > 0, inter /
+                            (paddle.unsqueeze(area1, 1) + area2 - inter),
+                            paddle.zeros_like(inter))
+    return overlaps
+
+
+def xywh2xyxy(box):
+    x, y, w, h = box
+    x1 = x - w * 0.5
+    y1 = y - h * 0.5
+    x2 = x + w * 0.5
+    y2 = y + h * 0.5
+    return [x1, y1, x2, y2]
+
+
+def make_grid(h, w, dtype):
+    yv, xv = paddle.meshgrid([paddle.arange(h), paddle.arange(w)])
+    return paddle.stack((xv, yv), 2).cast(dtype=dtype)
+
+
+def decode_yolo(box, anchor, downsample_ratio):
+    """decode yolo box
+
+    Args:
+        box (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
+        anchor (list): anchor with the shape [na, 2]
+        downsample_ratio (int): downsample ratio, default 32
+        scale (float): scale, default 1.
+
+    Return:
+        box (list): decoded box, [x, y, w, h], all have the shape [b, na, h, w, 1]
+    """
+    x, y, w, h = box
+    na, grid_h, grid_w = x.shape[1:4]
+    grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2))
+    x1 = (x + grid[:, :, :, :, 0:1]) / grid_w
+    y1 = (y + grid[:, :, :, :, 1:2]) / grid_h
+
+    anchor = paddle.to_tensor(anchor)
+    anchor = paddle.cast(anchor, x.dtype)
+    anchor = anchor.reshape((1, na, 1, 1, 2))
+    w1 = paddle.exp(w) * anchor[:, :, :, :, 0:1] / (downsample_ratio * grid_w)
+    h1 = paddle.exp(h) * anchor[:, :, :, :, 1:2] / (downsample_ratio * grid_h)
+
+    return [x1, y1, w1, h1]
+
+
+def iou_similarity(box1, box2, eps=1e-9):
+    """Calculate iou of box1 and box2
+
+    Args:
+        box1 (Tensor): box with the shape [N, M1, 4]
+        box2 (Tensor): box with the shape [N, M2, 4]
+
+    Return:
+        iou (Tensor): iou between box1 and box2 with the shape [N, M1, M2]
+    """
+    box1 = box1.unsqueeze(2)  # [N, M1, 4] -> [N, M1, 1, 4]
+    box2 = box2.unsqueeze(1)  # [N, M2, 4] -> [N, 1, M2, 4]
+    px1y1, px2y2 = box1[:, :, :, 0:2], box1[:, :, :, 2:4]
+    gx1y1, gx2y2 = box2[:, :, :, 0:2], box2[:, :, :, 2:4]
+    x1y1 = paddle.maximum(px1y1, gx1y1)
+    x2y2 = paddle.minimum(px2y2, gx2y2)
+    overlap = (x2y2 - x1y1).clip(0).prod(-1)
+    area1 = (px2y2 - px1y1).clip(0).prod(-1)
+    area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
+    union = area1 + area2 - overlap + eps
+    return overlap / union
+
+
+def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
+    """calculate the iou of box1 and box2
+
+    Args:
+        box1 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
+        box2 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
+        giou (bool): whether use giou or not, default False
+        diou (bool): whether use diou or not, default False
+        ciou (bool): whether use ciou or not, default False
+        eps (float): epsilon to avoid divide by zero
+
+    Return:
+        iou (Tensor): iou of box1 and box1, with the shape [b, na, h, w, 1]
+    """
+    px1, py1, px2, py2 = box1
+    gx1, gy1, gx2, gy2 = box2
+    x1 = paddle.maximum(px1, gx1)
+    y1 = paddle.maximum(py1, gy1)
+    x2 = paddle.minimum(px2, gx2)
+    y2 = paddle.minimum(py2, gy2)
+
+    overlap = ((x2 - x1).clip(0)) * ((y2 - y1).clip(0))
+
+    area1 = (px2 - px1) * (py2 - py1)
+    area1 = area1.clip(0)
+
+    area2 = (gx2 - gx1) * (gy2 - gy1)
+    area2 = area2.clip(0)
+
+    union = area1 + area2 - overlap + eps
+    iou = overlap / union
+
+    if giou or ciou or diou:
+        # convex w, h
+        cw = paddle.maximum(px2, gx2) - paddle.minimum(px1, gx1)
+        ch = paddle.maximum(py2, gy2) - paddle.minimum(py1, gy1)
+        if giou:
+            c_area = cw * ch + eps
+            return iou - (c_area - union) / c_area
+        else:
+            # convex diagonal squared
+            c2 = cw**2 + ch**2 + eps
+            # center distance
+            rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
+            if diou:
+                return iou - rho2 / c2
+            else:
+                w1, h1 = px2 - px1, py2 - py1 + eps
+                w2, h2 = gx2 - gx1, gy2 - gy1 + eps
+                delta = paddle.atan(w1 / h1) - paddle.atan(w2 / h2)
+                v = (4 / math.pi**2) * paddle.pow(delta, 2)
+                alpha = v / (1 + eps - iou + v)
+                alpha.stop_gradient = True
+                return iou - (rho2 / c2 + v * alpha)
+    else:
+        return iou
+
+
+def poly2rbox(polys):
+    """
+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+    to
+    rotated_boxes:[x_ctr,y_ctr,w,h,angle]
+    """
+    rotated_boxes = []
+    for poly in polys:
+        poly = np.array(poly[:8], dtype=np.float32)
+
+        pt1 = (poly[0], poly[1])
+        pt2 = (poly[2], poly[3])
+        pt3 = (poly[4], poly[5])
+        pt4 = (poly[6], poly[7])
+
+        edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[
+            1]) * (pt1[1] - pt2[1]))
+        edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[
+            1]) * (pt2[1] - pt3[1]))
+
+        width = max(edge1, edge2)
+        height = min(edge1, edge2)
+
+        rbox_angle = 0
+        if edge1 > edge2:
+            rbox_angle = np.arctan2(
+                np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0]))
+        elif edge2 >= edge1:
+            rbox_angle = np.arctan2(
+                np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0]))
+
+        def norm_angle(angle, range=[-np.pi / 4, np.pi]):
+            return (angle - range[0]) % range[1] + range[0]
+
+        rbox_angle = norm_angle(rbox_angle)
+
+        x_ctr = np.float(pt1[0] + pt3[0]) / 2
+        y_ctr = np.float(pt1[1] + pt3[1]) / 2
+        rotated_box = np.array([x_ctr, y_ctr, width, height, rbox_angle])
+        rotated_boxes.append(rotated_box)
+    ret_rotated_boxes = np.array(rotated_boxes)
+    assert ret_rotated_boxes.shape[1] == 5
+    return ret_rotated_boxes
+
+
+def cal_line_length(point1, point2):
+    import math
+    return math.sqrt(
+        math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
+
+
+def get_best_begin_point_single(coordinate):
+    x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
+    xmin = min(x1, x2, x3, x4)
+    ymin = min(y1, y2, y3, y4)
+    xmax = max(x1, x2, x3, x4)
+    ymax = max(y1, y2, y3, y4)
+    combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
+                 [[x4, y4], [x1, y1], [x2, y2], [x3, y3]],
+                 [[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
+                 [[x2, y2], [x3, y3], [x4, y4], [x1, y1]]]
+    dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
+    force = 100000000.0
+    force_flag = 0
+    for i in range(4):
+        temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) \
+                     + cal_line_length(combinate[i][1], dst_coordinate[1]) \
+                     + cal_line_length(combinate[i][2], dst_coordinate[2]) \
+                     + cal_line_length(combinate[i][3], dst_coordinate[3])
+        if temp_force < force:
+            force = temp_force
+            force_flag = i
+    if force_flag != 0:
+        pass
+    return np.array(combinate[force_flag]).reshape(8)
+
+
+def rbox2poly_np(rrects):
+    """
+    rrect:[x_ctr,y_ctr,w,h,angle]
+    to
+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+    """
+    polys = []
+    for i in range(rrects.shape[0]):
+        rrect = rrects[i]
+        # x_ctr, y_ctr, width, height, angle = rrect[:5]
+        x_ctr = rrect[0]
+        y_ctr = rrect[1]
+        width = rrect[2]
+        height = rrect[3]
+        angle = rrect[4]
+        tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+        rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+        R = np.array([[np.cos(angle), -np.sin(angle)],
+                      [np.sin(angle), np.cos(angle)]])
+        poly = R.dot(rect)
+        x0, x1, x2, x3 = poly[0, :4] + x_ctr
+        y0, y1, y2, y3 = poly[1, :4] + y_ctr
+        poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
+        poly = get_best_begin_point_single(poly)
+        polys.append(poly)
+    polys = np.array(polys)
+    return polys
+
+
+def rbox2poly(rrects):
+    """
+    rrect:[x_ctr,y_ctr,w,h,angle]
+    to
+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+    """
+    N = paddle.shape(rrects)[0]
+
+    x_ctr = rrects[:, 0]
+    y_ctr = rrects[:, 1]
+    width = rrects[:, 2]
+    height = rrects[:, 3]
+    angle = rrects[:, 4]
+
+    tl_x, tl_y, br_x, br_y = -width * 0.5, -height * 0.5, width * 0.5, height * 0.5
+
+    normal_rects = paddle.stack(
+        [tl_x, br_x, br_x, tl_x, tl_y, tl_y, br_y, br_y], axis=0)
+    normal_rects = paddle.reshape(normal_rects, [2, 4, N])
+    normal_rects = paddle.transpose(normal_rects, [2, 0, 1])
+
+    sin, cos = paddle.sin(angle), paddle.cos(angle)
+    # M.shape=[N,2,2]
+    M = paddle.stack([cos, -sin, sin, cos], axis=0)
+    M = paddle.reshape(M, [2, 2, N])
+    M = paddle.transpose(M, [2, 0, 1])
+
+    # polys:[N,8]
+    polys = paddle.matmul(M, normal_rects)
+    polys = paddle.transpose(polys, [2, 1, 0])
+    polys = paddle.reshape(polys, [-1, N])
+    polys = paddle.transpose(polys, [1, 0])
+
+    tmp = paddle.stack(
+        [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
+    polys = polys + tmp
+    return polys
+
+
+def bbox_iou_np_expand(box1, box2, x1y1x2y2=True, eps=1e-16):
+    """
+    Calculate the iou of box1 and box2 with numpy.
+
+    Args:
+        box1 (ndarray): [N, 4]
+        box2 (ndarray): [M, 4], usually N != M
+        x1y1x2y2 (bool): whether in x1y1x2y2 stype, default True
+        eps (float): epsilon to avoid divide by zero
+    Return:
+        iou (ndarray): iou of box1 and box2, [N, M]
+    """
+    N, M = len(box1), len(box2)  # usually N != M
+    if x1y1x2y2:
+        b1_x1, b1_y1 = box1[:, 0], box1[:, 1]
+        b1_x2, b1_y2 = box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1 = box2[:, 0], box2[:, 1]
+        b2_x2, b2_y2 = box2[:, 2], box2[:, 3]
+    else:
+        # cxcywh style
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+
+    # get the coordinates of the intersection rectangle
+    inter_rect_x1 = np.zeros((N, M), dtype=np.float32)
+    inter_rect_y1 = np.zeros((N, M), dtype=np.float32)
+    inter_rect_x2 = np.zeros((N, M), dtype=np.float32)
+    inter_rect_y2 = np.zeros((N, M), dtype=np.float32)
+    for i in range(len(box2)):
+        inter_rect_x1[:, i] = np.maximum(b1_x1, b2_x1[i])
+        inter_rect_y1[:, i] = np.maximum(b1_y1, b2_y1[i])
+        inter_rect_x2[:, i] = np.minimum(b1_x2, b2_x2[i])
+        inter_rect_y2[:, i] = np.minimum(b1_y2, b2_y2[i])
+    # Intersection area
+    inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * np.maximum(
+        inter_rect_y2 - inter_rect_y1, 0)
+    # Union Area
+    b1_area = np.repeat(
+        ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).reshape(-1, 1), M, axis=-1)
+    b2_area = np.repeat(
+        ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).reshape(1, -1), N, axis=0)
+
+    ious = inter_area / (b1_area + b2_area - inter_area + eps)
+    return ious
--- a/build/lib/ppdet/modeling/heads/init.py
+++ b/build/lib/ppdet/modeling/heads/init.py
@ -0,0 +1,41 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import bbox_head
+from . import mask_head
+from . import yolo_head
+from . import roi_extractor
+from . import ssd_head
+from . import fcos_head
+from . import solov2_head
+from . import ttf_head
+from . import cascade_head
+from . import face_head
+from . import s2anet_head
+from . import keypoint_hrhrnet_head
+from . import centernet_head
+
+from .bbox_head import *
+from .mask_head import *
+from .yolo_head import *
+from .roi_extractor import *
+from .ssd_head import *
+from .fcos_head import *
+from .solov2_head import *
+from .ttf_head import *
+from .cascade_head import *
+from .face_head import *
+from .s2anet_head import *
+from .keypoint_hrhrnet_head import *
+from .centernet_head import *
--- a/build/lib/ppdet/modeling/heads/bbox_head.py
+++ b/build/lib/ppdet/modeling/heads/bbox_head.py
@ -0,0 +1,376 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import numpy as np
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, XavierUniform, KaimingNormal
+from paddle.regularizer import L2Decay
+
+from ppdet.core.workspace import register, create
+from .roi_extractor import RoIAlign
+from ..shape_spec import ShapeSpec
+from ..bbox_utils import bbox2delta
+from ppdet.modeling.layers import ConvNormLayer
+
+__all__ = ['TwoFCHead', 'XConvNormHead', 'BBoxHead']
+
+
+@register
+class TwoFCHead(nn.Layer):
+    """
+    RCNN bbox head with Two fc layers to extract feature
+
+    Args:
+        in_channel (int): Input channel which can be derived by from_config
+        out_channel (int): Output channel
+        resolution (int): Resolution of input feature map, default 7
+    """
+
+    def __init__(self, in_channel=256, out_channel=1024, resolution=7):
+        super(TwoFCHead, self).__init__()
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        fan = in_channel * resolution * resolution
+        self.fc6 = nn.Linear(
+            in_channel * resolution * resolution,
+            out_channel,
+            weight_attr=paddle.ParamAttr(
+                initializer=XavierUniform(fan_out=fan)))
+        self.fc6.skip_quant = True
+
+        self.fc7 = nn.Linear(
+            out_channel,
+            out_channel,
+            weight_attr=paddle.ParamAttr(initializer=XavierUniform()))
+        self.fc7.skip_quant = True
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        s = input_shape
+        s = s[0] if isinstance(s, (list, tuple)) else s
+        return {'in_channel': s.channels}
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=self.out_channel, )]
+
+    def forward(self, rois_feat):
+        rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
+        fc6 = self.fc6(rois_feat)
+        fc6 = F.relu(fc6)
+        fc7 = self.fc7(fc6)
+        fc7 = F.relu(fc7)
+        return fc7
+
+
+@register
+class XConvNormHead(nn.Layer):
+    __shared__ = ['norm_type', 'freeze_norm']
+    """
+    RCNN bbox head with serveral convolution layers
+
+    Args:
+        in_channel (int): Input channels which can be derived by from_config
+        num_convs (int): The number of conv layers
+        conv_dim (int): The number of channels for the conv layers
+        out_channel (int): Output channels
+        resolution (int): Resolution of input feature map
+        norm_type (string): Norm type, bn, gn, sync_bn are available, 
+            default `gn`
+        freeze_norm (bool): Whether to freeze the norm
+        stage_name (string): Prefix name for conv layer,  '' by default
+    """
+
+    def __init__(self,
+                 in_channel=256,
+                 num_convs=4,
+                 conv_dim=256,
+                 out_channel=1024,
+                 resolution=7,
+                 norm_type='gn',
+                 freeze_norm=False,
+                 stage_name=''):
+        super(XConvNormHead, self).__init__()
+        self.in_channel = in_channel
+        self.num_convs = num_convs
+        self.conv_dim = conv_dim
+        self.out_channel = out_channel
+        self.norm_type = norm_type
+        self.freeze_norm = freeze_norm
+
+        self.bbox_head_convs = []
+        fan = conv_dim * 3 * 3
+        initializer = KaimingNormal(fan_in=fan)
+        for i in range(self.num_convs):
+            in_c = in_channel if i == 0 else conv_dim
+            head_conv_name = stage_name + 'bbox_head_conv{}'.format(i)
+            head_conv = self.add_sublayer(
+                head_conv_name,
+                ConvNormLayer(
+                    ch_in=in_c,
+                    ch_out=conv_dim,
+                    filter_size=3,
+                    stride=1,
+                    norm_type=self.norm_type,
+                    freeze_norm=self.freeze_norm,
+                    initializer=initializer))
+            self.bbox_head_convs.append(head_conv)
+
+        fan = conv_dim * resolution * resolution
+        self.fc6 = nn.Linear(
+            conv_dim * resolution * resolution,
+            out_channel,
+            weight_attr=paddle.ParamAttr(
+                initializer=XavierUniform(fan_out=fan)),
+            bias_attr=paddle.ParamAttr(
+                learning_rate=2., regularizer=L2Decay(0.)))
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        s = input_shape
+        s = s[0] if isinstance(s, (list, tuple)) else s
+        return {'in_channel': s.channels}
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=self.out_channel, )]
+
+    def forward(self, rois_feat):
+        for i in range(self.num_convs):
+            rois_feat = F.relu(self.bbox_head_convs[i](rois_feat))
+        rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
+        fc6 = F.relu(self.fc6(rois_feat))
+        return fc6
+
+
+@register
+class BBoxHead(nn.Layer):
+    __shared__ = ['num_classes']
+    __inject__ = ['bbox_assigner', 'bbox_loss']
+    """
+    RCNN bbox head
+
+    Args:
+        head (nn.Layer): Extract feature in bbox head
+        in_channel (int): Input channel after RoI extractor
+        roi_extractor (object): The module of RoI Extractor
+        bbox_assigner (object): The module of Box Assigner, label and sample the 
+            box.
+        with_pool (bool): Whether to use pooling for the RoI feature.
+        num_classes (int): The number of classes
+        bbox_weight (List[float]): The weight to get the decode box 
+    """
+
+    def __init__(self,
+                 head,
+                 in_channel,
+                 roi_extractor=RoIAlign().__dict__,
+                 bbox_assigner='BboxAssigner',
+                 with_pool=False,
+                 num_classes=80,
+                 bbox_weight=[10., 10., 5., 5.],
+                 bbox_loss=None):
+        super(BBoxHead, self).__init__()
+        self.head = head
+        self.roi_extractor = roi_extractor
+        if isinstance(roi_extractor, dict):
+            self.roi_extractor = RoIAlign(**roi_extractor)
+        self.bbox_assigner = bbox_assigner
+
+        self.with_pool = with_pool
+        self.num_classes = num_classes
+        self.bbox_weight = bbox_weight
+        self.bbox_loss = bbox_loss
+
+        self.bbox_score = nn.Linear(
+            in_channel,
+            self.num_classes + 1,
+            weight_attr=paddle.ParamAttr(initializer=Normal(
+                mean=0.0, std=0.01)))
+        self.bbox_score.skip_quant = True
+
+        self.bbox_delta = nn.Linear(
+            in_channel,
+            4 * self.num_classes,
+            weight_attr=paddle.ParamAttr(initializer=Normal(
+                mean=0.0, std=0.001)))
+        self.bbox_delta.skip_quant = True
+        self.assigned_label = None
+        self.assigned_rois = None
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        roi_pooler = cfg['roi_extractor']
+        assert isinstance(roi_pooler, dict)
+        kwargs = RoIAlign.from_config(cfg, input_shape)
+        roi_pooler.update(kwargs)
+        kwargs = {'input_shape': input_shape}
+        head = create(cfg['head'], **kwargs)
+        return {
+            'roi_extractor': roi_pooler,
+            'head': head,
+            'in_channel': head.out_shape[0].channels
+        }
+
+    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
+        """
+        body_feats (list[Tensor]): Feature maps from backbone
+        rois (list[Tensor]): RoIs generated from RPN module
+        rois_num (Tensor): The number of RoIs in each image
+        inputs (dict{Tensor}): The ground-truth of image
+        """
+        if self.training:
+            rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
+            self.assigned_rois = (rois, rois_num)
+            self.assigned_targets = targets
+
+        rois_feat = self.roi_extractor(body_feats, rois, rois_num)
+        bbox_feat = self.head(rois_feat)
+        if self.with_pool:
+            feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
+            feat = paddle.squeeze(feat, axis=[2, 3])
+        else:
+            feat = bbox_feat
+        scores = self.bbox_score(feat)
+        deltas = self.bbox_delta(feat)
+
+        if self.training:
+            loss = self.get_loss(scores, deltas, targets, rois,
+                                 self.bbox_weight)
+            return loss, bbox_feat
+        else:
+            pred = self.get_prediction(scores, deltas)
+            return pred, self.head
+
+    def get_loss(self, scores, deltas, targets, rois, bbox_weight):
+        """
+        scores (Tensor): scores from bbox head outputs
+        deltas (Tensor): deltas from bbox head outputs
+        targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds
+        rois (List[Tensor]): RoIs generated in each batch
+        """
+        cls_name = 'loss_bbox_cls'
+        reg_name = 'loss_bbox_reg'
+        loss_bbox = {}
+
+        # TODO: better pass args
+        tgt_labels, tgt_bboxes, tgt_gt_inds = targets
+
+        # bbox cls
+        tgt_labels = paddle.concat(tgt_labels) if len(
+            tgt_labels) > 1 else tgt_labels[0]
+        valid_inds = paddle.nonzero(tgt_labels >= 0).flatten()
+        if valid_inds.shape[0] == 0:
+            loss_bbox[cls_name] = paddle.zeros([1], dtype='float32')
+        else:
+            tgt_labels = tgt_labels.cast('int64')
+            tgt_labels.stop_gradient = True
+            loss_bbox_cls = F.cross_entropy(
+                input=scores, label=tgt_labels, reduction='mean')
+            loss_bbox[cls_name] = loss_bbox_cls
+
+        # bbox reg
+
+        cls_agnostic_bbox_reg = deltas.shape[1] == 4
+
+        fg_inds = paddle.nonzero(
+            paddle.logical_and(tgt_labels >= 0, tgt_labels <
+                               self.num_classes)).flatten()
+
+        if fg_inds.numel() == 0:
+            loss_bbox[reg_name] = paddle.zeros([1], dtype='float32')
+            return loss_bbox
+
+        if cls_agnostic_bbox_reg:
+            reg_delta = paddle.gather(deltas, fg_inds)
+        else:
+            fg_gt_classes = paddle.gather(tgt_labels, fg_inds)
+
+            reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1)
+            reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1])
+
+            reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4)
+
+            reg_col_inds = reg_col_inds.reshape([-1, 1])
+            reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1)
+
+            reg_delta = paddle.gather(deltas, fg_inds)
+            reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4])
+        rois = paddle.concat(rois) if len(rois) > 1 else rois[0]
+        tgt_bboxes = paddle.concat(tgt_bboxes) if len(
+            tgt_bboxes) > 1 else tgt_bboxes[0]
+
+        reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight)
+        reg_target = paddle.gather(reg_target, fg_inds)
+        reg_target.stop_gradient = True
+
+        if self.bbox_loss is not None:
+            reg_delta = self.bbox_transform(reg_delta)
+            reg_target = self.bbox_transform(reg_target)
+            loss_bbox_reg = self.bbox_loss(
+                reg_delta, reg_target).sum() / tgt_labels.shape[0]
+            loss_bbox_reg *= self.num_classes
+        else:
+            loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum(
+            ) / tgt_labels.shape[0]
+
+        loss_bbox[reg_name] = loss_bbox_reg
+
+        return loss_bbox
+
+    def bbox_transform(self, deltas, weights=[0.1, 0.1, 0.2, 0.2]):
+        wx, wy, ww, wh = weights
+
+        deltas = paddle.reshape(deltas, shape=(0, -1, 4))
+
+        dx = paddle.slice(deltas, axes=[2], starts=[0], ends=[1]) * wx
+        dy = paddle.slice(deltas, axes=[2], starts=[1], ends=[2]) * wy
+        dw = paddle.slice(deltas, axes=[2], starts=[2], ends=[3]) * ww
+        dh = paddle.slice(deltas, axes=[2], starts=[3], ends=[4]) * wh
+
+        dw = paddle.clip(dw, -1.e10, np.log(1000. / 16))
+        dh = paddle.clip(dh, -1.e10, np.log(1000. / 16))
+
+        pred_ctr_x = dx
+        pred_ctr_y = dy
+        pred_w = paddle.exp(dw)
+        pred_h = paddle.exp(dh)
+
+        x1 = pred_ctr_x - 0.5 * pred_w
+        y1 = pred_ctr_y - 0.5 * pred_h
+        x2 = pred_ctr_x + 0.5 * pred_w
+        y2 = pred_ctr_y + 0.5 * pred_h
+
+        x1 = paddle.reshape(x1, shape=(-1, ))
+        y1 = paddle.reshape(y1, shape=(-1, ))
+        x2 = paddle.reshape(x2, shape=(-1, ))
+        y2 = paddle.reshape(y2, shape=(-1, ))
+
+        return paddle.concat([x1, y1, x2, y2])
+
+    def get_prediction(self, score, delta):
+        bbox_prob = F.softmax(score)
+        return delta, bbox_prob
+
+    def get_head(self, ):
+        return self.head
+
+    def get_assigned_targets(self, ):
+        return self.assigned_targets
+
+    def get_assigned_rois(self, ):
+        return self.assigned_rois
--- a/build/lib/ppdet/modeling/heads/cascade_head.py
+++ b/build/lib/ppdet/modeling/heads/cascade_head.py
@ -0,0 +1,281 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal
+
+from ppdet.core.workspace import register
+from .bbox_head import BBoxHead, TwoFCHead, XConvNormHead
+from .roi_extractor import RoIAlign
+from ..shape_spec import ShapeSpec
+from ..bbox_utils import delta2bbox, clip_bbox, nonempty_bbox
+
+__all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead']
+
+
+@register
+class CascadeTwoFCHead(nn.Layer):
+    __shared__ = ['num_cascade_stage']
+    """
+    Cascade RCNN bbox head  with Two fc layers to extract feature
+
+    Args:
+        in_channel (int): Input channel which can be derived by from_config
+        out_channel (int): Output channel
+        resolution (int): Resolution of input feature map, default 7
+        num_cascade_stage (int): The number of cascade stage, default 3
+    """
+
+    def __init__(self,
+                 in_channel=256,
+                 out_channel=1024,
+                 resolution=7,
+                 num_cascade_stage=3):
+        super(CascadeTwoFCHead, self).__init__()
+
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+
+        self.head_list = []
+        for stage in range(num_cascade_stage):
+            head_per_stage = self.add_sublayer(
+                str(stage), TwoFCHead(in_channel, out_channel, resolution))
+            self.head_list.append(head_per_stage)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        s = input_shape
+        s = s[0] if isinstance(s, (list, tuple)) else s
+        return {'in_channel': s.channels}
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=self.out_channel, )]
+
+    def forward(self, rois_feat, stage=0):
+        out = self.head_list[stage](rois_feat)
+        return out
+
+
+@register
+class CascadeXConvNormHead(nn.Layer):
+    __shared__ = ['norm_type', 'freeze_norm', 'num_cascade_stage']
+    """
+    Cascade RCNN bbox head with serveral convolution layers
+
+    Args:
+        in_channel (int): Input channels which can be derived by from_config
+        num_convs (int): The number of conv layers
+        conv_dim (int): The number of channels for the conv layers
+        out_channel (int): Output channels
+        resolution (int): Resolution of input feature map
+        norm_type (string): Norm type, bn, gn, sync_bn are available, 
+            default `gn`
+        freeze_norm (bool): Whether to freeze the norm
+        num_cascade_stage (int): The number of cascade stage, default 3
+    """
+
+    def __init__(self,
+                 in_channel=256,
+                 num_convs=4,
+                 conv_dim=256,
+                 out_channel=1024,
+                 resolution=7,
+                 norm_type='gn',
+                 freeze_norm=False,
+                 num_cascade_stage=3):
+        super(CascadeXConvNormHead, self).__init__()
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+
+        self.head_list = []
+        for stage in range(num_cascade_stage):
+            head_per_stage = self.add_sublayer(
+                str(stage),
+                XConvNormHead(
+                    in_channel,
+                    num_convs,
+                    conv_dim,
+                    out_channel,
+                    resolution,
+                    norm_type,
+                    freeze_norm,
+                    stage_name='stage{}_'.format(stage)))
+            self.head_list.append(head_per_stage)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        s = input_shape
+        s = s[0] if isinstance(s, (list, tuple)) else s
+        return {'in_channel': s.channels}
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=self.out_channel, )]
+
+    def forward(self, rois_feat, stage=0):
+        out = self.head_list[stage](rois_feat)
+        return out
+
+
+@register
+class CascadeHead(BBoxHead):
+    __shared__ = ['num_classes', 'num_cascade_stages']
+    __inject__ = ['bbox_assigner', 'bbox_loss']
+    """
+    Cascade RCNN bbox head
+
+    Args:
+        head (nn.Layer): Extract feature in bbox head
+        in_channel (int): Input channel after RoI extractor
+        roi_extractor (object): The module of RoI Extractor
+        bbox_assigner (object): The module of Box Assigner, label and sample the 
+            box.
+        num_classes (int): The number of classes
+        bbox_weight (List[List[float]]): The weight to get the decode box and the 
+            length of weight is the number of cascade stage
+        num_cascade_stages (int): THe number of stage to refine the box
+    """
+
+    def __init__(self,
+                 head,
+                 in_channel,
+                 roi_extractor=RoIAlign().__dict__,
+                 bbox_assigner='BboxAssigner',
+                 num_classes=80,
+                 bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
+                              [30.0, 30.0, 15.0, 15.0]],
+                 num_cascade_stages=3,
+                 bbox_loss=None):
+        nn.Layer.__init__(self, )
+        self.head = head
+        self.roi_extractor = roi_extractor
+        if isinstance(roi_extractor, dict):
+            self.roi_extractor = RoIAlign(**roi_extractor)
+        self.bbox_assigner = bbox_assigner
+
+        self.num_classes = num_classes
+        self.bbox_weight = bbox_weight
+        self.num_cascade_stages = num_cascade_stages
+        self.bbox_loss = bbox_loss
+
+        self.bbox_score_list = []
+        self.bbox_delta_list = []
+        for i in range(num_cascade_stages):
+            score_name = 'bbox_score_stage{}'.format(i)
+            delta_name = 'bbox_delta_stage{}'.format(i)
+            bbox_score = self.add_sublayer(
+                score_name,
+                nn.Linear(
+                    in_channel,
+                    self.num_classes + 1,
+                    weight_attr=paddle.ParamAttr(initializer=Normal(
+                        mean=0.0, std=0.01))))
+
+            bbox_delta = self.add_sublayer(
+                delta_name,
+                nn.Linear(
+                    in_channel,
+                    4,
+                    weight_attr=paddle.ParamAttr(initializer=Normal(
+                        mean=0.0, std=0.001))))
+            self.bbox_score_list.append(bbox_score)
+            self.bbox_delta_list.append(bbox_delta)
+        self.assigned_label = None
+        self.assigned_rois = None
+
+    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
+        """
+        body_feats (list[Tensor]): Feature maps from backbone
+        rois (Tensor): RoIs generated from RPN module
+        rois_num (Tensor): The number of RoIs in each image
+        inputs (dict{Tensor}): The ground-truth of image
+        """
+        targets = []
+        if self.training:
+            rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
+            targets_list = [targets]
+            self.assigned_rois = (rois, rois_num)
+            self.assigned_targets = targets
+
+        pred_bbox = None
+        head_out_list = []
+        for i in range(self.num_cascade_stages):
+            if i > 0:
+                rois, rois_num = self._get_rois_from_boxes(pred_bbox,
+                                                           inputs['im_shape'])
+                if self.training:
+                    rois, rois_num, targets = self.bbox_assigner(
+                        rois, rois_num, inputs, i, is_cascade=True)
+                    targets_list.append(targets)
+
+            rois_feat = self.roi_extractor(body_feats, rois, rois_num)
+            bbox_feat = self.head(rois_feat, i)
+            scores = self.bbox_score_list[i](bbox_feat)
+            deltas = self.bbox_delta_list[i](bbox_feat)
+            head_out_list.append([scores, deltas, rois])
+            pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i])
+
+        if self.training:
+            loss = {}
+            for stage, value in enumerate(zip(head_out_list, targets_list)):
+                (scores, deltas, rois), targets = value
+                loss_stage = self.get_loss(scores, deltas, targets, rois,
+                                           self.bbox_weight[stage])
+                for k, v in loss_stage.items():
+                    loss[k + "_stage{}".format(
+                        stage)] = v / self.num_cascade_stages
+
+            return loss, bbox_feat
+        else:
+            scores, deltas, self.refined_rois = self.get_prediction(
+                head_out_list)
+            return (deltas, scores), self.head
+
+    def _get_rois_from_boxes(self, boxes, im_shape):
+        rois = []
+        for i, boxes_per_image in enumerate(boxes):
+            clip_box = clip_bbox(boxes_per_image, im_shape[i])
+            if self.training:
+                keep = nonempty_bbox(clip_box)
+                if keep.shape[0] == 0:
+                    keep = paddle.zeros([1], dtype='int32')
+                clip_box = paddle.gather(clip_box, keep)
+            rois.append(clip_box)
+        rois_num = paddle.concat([paddle.shape(r)[0] for r in rois])
+        return rois, rois_num
+
+    def _get_pred_bbox(self, deltas, proposals, weights):
+        pred_proposals = paddle.concat(proposals) if len(
+            proposals) > 1 else proposals[0]
+        pred_bbox = delta2bbox(deltas, pred_proposals, weights)
+        pred_bbox = paddle.reshape(pred_bbox, [-1, deltas.shape[-1]])
+        num_prop = [p.shape[0] for p in proposals]
+        return pred_bbox.split(num_prop)
+
+    def get_prediction(self, head_out_list):
+        """
+        head_out_list(List[Tensor]): scores, deltas, rois
+        """
+        pred_list = []
+        scores_list = [F.softmax(head[0]) for head in head_out_list]
+        scores = paddle.add_n(scores_list) / self.num_cascade_stages
+        # Get deltas and rois from the last stage
+        _, deltas, rois = head_out_list[-1]
+        return scores, deltas, rois
+
+    def get_refined_rois(self, ):
+        return self.refined_rois
--- a/build/lib/ppdet/modeling/heads/centernet_head.py
+++ b/build/lib/ppdet/modeling/heads/centernet_head.py
@ -0,0 +1,192 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import KaimingUniform
+from ppdet.core.workspace import register
+from ppdet.modeling.losses import CTFocalLoss
+
+
+class ConvLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=False):
+        super(ConvLayer, self).__init__()
+        bias_attr = False
+        fan_in = ch_in * kernel_size**2
+        bound = 1 / math.sqrt(fan_in)
+        param_attr = paddle.ParamAttr(initializer=KaimingUniform())
+        if bias:
+            bias_attr = paddle.ParamAttr(
+                initializer=nn.initializer.Uniform(-bound, bound))
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=param_attr,
+            bias_attr=bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+
+        return out
+
+
+@register
+class CenterNetHead(nn.Layer):
+    """
+    Args:
+        in_channels (int): the channel number of input to CenterNetHead.
+        num_classes (int): the number of classes, 80 by default.
+        head_planes (int): the channel number in all head, 256 by default.
+        heatmap_weight (float): the weight of heatmap loss, 1 by default.
+        regress_ltrb (bool): whether to regress left/top/right/bottom or
+            width/height for a box, true by default
+        size_weight (float): the weight of box size loss, 0.1 by default.
+        offset_weight (float): the weight of center offset loss, 1 by default.
+
+    """
+
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 in_channels,
+                 num_classes=80,
+                 head_planes=256,
+                 heatmap_weight=1,
+                 regress_ltrb=True,
+                 size_weight=0.1,
+                 offset_weight=1):
+        super(CenterNetHead, self).__init__()
+        self.weights = {
+            'heatmap': heatmap_weight,
+            'size': size_weight,
+            'offset': offset_weight
+        }
+        self.heatmap = nn.Sequential(
+            ConvLayer(
+                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
+            nn.ReLU(),
+            ConvLayer(
+                head_planes,
+                num_classes,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=True))
+        self.heatmap[2].conv.bias[:] = -2.19
+        self.size = nn.Sequential(
+            ConvLayer(
+                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
+            nn.ReLU(),
+            ConvLayer(
+                head_planes,
+                4 if regress_ltrb else 2,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=True))
+        self.offset = nn.Sequential(
+            ConvLayer(
+                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
+            nn.ReLU(),
+            ConvLayer(
+                head_planes, 2, kernel_size=1, stride=1, padding=0, bias=True))
+        self.focal_loss = CTFocalLoss()
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        if isinstance(input_shape, (list, tuple)):
+            input_shape = input_shape[0]
+        return {'in_channels': input_shape.channels}
+
+    def forward(self, feat, inputs):
+        heatmap = self.heatmap(feat)
+        size = self.size(feat)
+        offset = self.offset(feat)
+        if self.training:
+            loss = self.get_loss(heatmap, size, offset, self.weights, inputs)
+            return loss
+        else:
+            heatmap = F.sigmoid(heatmap)
+            return {'heatmap': heatmap, 'size': size, 'offset': offset}
+
+    def get_loss(self, heatmap, size, offset, weights, inputs):
+        heatmap_target = inputs['heatmap']
+        size_target = inputs['size']
+        offset_target = inputs['offset']
+        index = inputs['index']
+        mask = inputs['index_mask']
+        heatmap = paddle.clip(F.sigmoid(heatmap), 1e-4, 1 - 1e-4)
+        heatmap_loss = self.focal_loss(heatmap, heatmap_target)
+
+        size = paddle.transpose(size, perm=[0, 2, 3, 1])
+        size_n, size_h, size_w, size_c = size.shape
+        size = paddle.reshape(size, shape=[size_n, -1, size_c])
+        index = paddle.unsqueeze(index, 2)
+        batch_inds = list()
+        for i in range(size_n):
+            batch_ind = paddle.full(
+                shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
+            batch_inds.append(batch_ind)
+        batch_inds = paddle.concat(batch_inds, axis=0)
+        index = paddle.concat(x=[batch_inds, index], axis=2)
+        pos_size = paddle.gather_nd(size, index=index)
+        mask = paddle.unsqueeze(mask, axis=2)
+        size_mask = paddle.expand_as(mask, pos_size)
+        size_mask = paddle.cast(size_mask, dtype=pos_size.dtype)
+        pos_num = size_mask.sum()
+        size_mask.stop_gradient = True
+        size_target.stop_gradient = True
+        size_loss = F.l1_loss(
+            pos_size * size_mask, size_target * size_mask, reduction='sum')
+        size_loss = size_loss / (pos_num + 1e-4)
+
+        offset = paddle.transpose(offset, perm=[0, 2, 3, 1])
+        offset_n, offset_h, offset_w, offset_c = offset.shape
+        offset = paddle.reshape(offset, shape=[offset_n, -1, offset_c])
+        pos_offset = paddle.gather_nd(offset, index=index)
+        offset_mask = paddle.expand_as(mask, pos_offset)
+        offset_mask = paddle.cast(offset_mask, dtype=pos_offset.dtype)
+        pos_num = offset_mask.sum()
+        offset_mask.stop_gradient = True
+        offset_target.stop_gradient = True
+        offset_loss = F.l1_loss(
+            pos_offset * offset_mask,
+            offset_target * offset_mask,
+            reduction='sum')
+        offset_loss = offset_loss / (pos_num + 1e-4)
+
+        det_loss = weights['heatmap'] * heatmap_loss + weights[
+            'size'] * size_loss + weights['offset'] * offset_loss
+
+        return {
+            'det_loss': det_loss,
+            'heatmap_loss': heatmap_loss,
+            'size_loss': size_loss,
+            'offset_loss': offset_loss
+        }
--- a/build/lib/ppdet/modeling/heads/face_head.py
+++ b/build/lib/ppdet/modeling/heads/face_head.py
@ -0,0 +1,110 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from ppdet.core.workspace import register
+from ..layers import AnchorGeneratorSSD
+
+
+@register
+class FaceHead(nn.Layer):
+    """
+    Head block for Face detection network
+
+    Args:
+        num_classes (int): Number of output classes.
+        in_channels (int): Number of input channels.
+        anchor_generator(object): instance of anchor genertor method.
+        kernel_size (int): kernel size of Conv2D in FaceHead.
+        padding (int): padding of Conv2D in FaceHead.
+        conv_decay (float): norm_decay (float): weight decay for conv layer weights.
+        loss (object): loss of face detection model.
+    """
+    __shared__ = ['num_classes']
+    __inject__ = ['anchor_generator', 'loss']
+
+    def __init__(self,
+                 num_classes=80,
+                 in_channels=[96, 96],
+                 anchor_generator=AnchorGeneratorSSD().__dict__,
+                 kernel_size=3,
+                 padding=1,
+                 conv_decay=0.,
+                 loss='SSDLoss'):
+        super(FaceHead, self).__init__()
+        # add background class
+        self.num_classes = num_classes + 1
+        self.in_channels = in_channels
+        self.anchor_generator = anchor_generator
+        self.loss = loss
+
+        if isinstance(anchor_generator, dict):
+            self.anchor_generator = AnchorGeneratorSSD(**anchor_generator)
+
+        self.num_priors = self.anchor_generator.num_priors
+        self.box_convs = []
+        self.score_convs = []
+        for i, num_prior in enumerate(self.num_priors):
+            box_conv_name = "boxes{}".format(i)
+            box_conv = self.add_sublayer(
+                box_conv_name,
+                nn.Conv2D(
+                    in_channels=self.in_channels[i],
+                    out_channels=num_prior * 4,
+                    kernel_size=kernel_size,
+                    padding=padding))
+            self.box_convs.append(box_conv)
+
+            score_conv_name = "scores{}".format(i)
+            score_conv = self.add_sublayer(
+                score_conv_name,
+                nn.Conv2D(
+                    in_channels=self.in_channels[i],
+                    out_channels=num_prior * self.num_classes,
+                    kernel_size=kernel_size,
+                    padding=padding))
+            self.score_convs.append(score_conv)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {'in_channels': [i.channels for i in input_shape], }
+
+    def forward(self, feats, image, gt_bbox=None, gt_class=None):
+        box_preds = []
+        cls_scores = []
+        prior_boxes = []
+        for feat, box_conv, score_conv in zip(feats, self.box_convs,
+                                              self.score_convs):
+            box_pred = box_conv(feat)
+            box_pred = paddle.transpose(box_pred, [0, 2, 3, 1])
+            box_pred = paddle.reshape(box_pred, [0, -1, 4])
+            box_preds.append(box_pred)
+
+            cls_score = score_conv(feat)
+            cls_score = paddle.transpose(cls_score, [0, 2, 3, 1])
+            cls_score = paddle.reshape(cls_score, [0, -1, self.num_classes])
+            cls_scores.append(cls_score)
+
+        prior_boxes = self.anchor_generator(feats, image)
+
+        if self.training:
+            return self.get_loss(box_preds, cls_scores, gt_bbox, gt_class,
+                                 prior_boxes)
+        else:
+            return (box_preds, cls_scores), prior_boxes
+
+    def get_loss(self, boxes, scores, gt_bbox, gt_class, prior_boxes):
+        return self.loss(boxes, scores, gt_bbox, gt_class, prior_boxes)
--- a/build/lib/ppdet/modeling/heads/fcos_head.py
+++ b/build/lib/ppdet/modeling/heads/fcos_head.py
@ -0,0 +1,269 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Normal, Constant
+
+from ppdet.core.workspace import register
+from ppdet.modeling.layers import ConvNormLayer
+
+
+class ScaleReg(nn.Layer):
+    """
+    Parameter for scaling the regression outputs.
+    """
+
+    def __init__(self):
+        super(ScaleReg, self).__init__()
+        self.scale_reg = self.create_parameter(
+            shape=[1],
+            attr=ParamAttr(initializer=Constant(value=1.)),
+            dtype="float32")
+
+    def forward(self, inputs):
+        out = inputs * self.scale_reg
+        return out
+
+
+@register
+class FCOSFeat(nn.Layer):
+    """
+    FCOSFeat of FCOS
+
+    Args:
+        feat_in (int): The channel number of input Tensor.
+        feat_out (int): The channel number of output Tensor.
+        num_convs (int): The convolution number of the FCOSFeat.
+        norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
+        use_dcn (bool): Whether to use dcn in tower or not.
+    """
+
+    def __init__(self,
+                 feat_in=256,
+                 feat_out=256,
+                 num_convs=4,
+                 norm_type='bn',
+                 use_dcn=False):
+        super(FCOSFeat, self).__init__()
+        self.num_convs = num_convs
+        self.norm_type = norm_type
+        self.cls_subnet_convs = []
+        self.reg_subnet_convs = []
+        for i in range(self.num_convs):
+            in_c = feat_in if i == 0 else feat_out
+
+            cls_conv_name = 'fcos_head_cls_tower_conv_{}'.format(i)
+            cls_conv = self.add_sublayer(
+                cls_conv_name,
+                ConvNormLayer(
+                    ch_in=in_c,
+                    ch_out=feat_out,
+                    filter_size=3,
+                    stride=1,
+                    norm_type=norm_type,
+                    use_dcn=use_dcn,
+                    bias_on=True,
+                    lr_scale=2.))
+            self.cls_subnet_convs.append(cls_conv)
+
+            reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i)
+            reg_conv = self.add_sublayer(
+                reg_conv_name,
+                ConvNormLayer(
+                    ch_in=in_c,
+                    ch_out=feat_out,
+                    filter_size=3,
+                    stride=1,
+                    norm_type=norm_type,
+                    use_dcn=use_dcn,
+                    bias_on=True,
+                    lr_scale=2.))
+            self.reg_subnet_convs.append(reg_conv)
+
+    def forward(self, fpn_feat):
+        cls_feat = fpn_feat
+        reg_feat = fpn_feat
+        for i in range(self.num_convs):
+            cls_feat = F.relu(self.cls_subnet_convs[i](cls_feat))
+            reg_feat = F.relu(self.reg_subnet_convs[i](reg_feat))
+        return cls_feat, reg_feat
+
+
+@register
+class FCOSHead(nn.Layer):
+    """
+    FCOSHead
+    Args:
+        fcos_feat (object): Instance of 'FCOSFeat'
+        num_classes (int): Number of classes
+        fpn_stride (list): The stride of each FPN Layer
+        prior_prob (float): Used to set the bias init for the class prediction layer
+        fcos_loss (object): Instance of 'FCOSLoss'
+        norm_reg_targets (bool): Normalization the regression target if true
+        centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
+    """
+    __inject__ = ['fcos_feat', 'fcos_loss']
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 fcos_feat,
+                 num_classes=80,
+                 fpn_stride=[8, 16, 32, 64, 128],
+                 prior_prob=0.01,
+                 fcos_loss='FCOSLoss',
+                 norm_reg_targets=True,
+                 centerness_on_reg=True):
+        super(FCOSHead, self).__init__()
+        self.fcos_feat = fcos_feat
+        self.num_classes = num_classes
+        self.fpn_stride = fpn_stride
+        self.prior_prob = prior_prob
+        self.fcos_loss = fcos_loss
+        self.norm_reg_targets = norm_reg_targets
+        self.centerness_on_reg = centerness_on_reg
+
+        conv_cls_name = "fcos_head_cls"
+        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
+        self.fcos_head_cls = self.add_sublayer(
+            conv_cls_name,
+            nn.Conv2D(
+                in_channels=256,
+                out_channels=self.num_classes,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(
+                    name=conv_cls_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_cls_name + "_bias",
+                    initializer=Constant(value=bias_init_value))))
+
+        conv_reg_name = "fcos_head_reg"
+        self.fcos_head_reg = self.add_sublayer(
+            conv_reg_name,
+            nn.Conv2D(
+                in_channels=256,
+                out_channels=4,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(
+                    name=conv_reg_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_reg_name + "_bias",
+                    initializer=Constant(value=0))))
+
+        conv_centerness_name = "fcos_head_centerness"
+        self.fcos_head_centerness = self.add_sublayer(
+            conv_centerness_name,
+            nn.Conv2D(
+                in_channels=256,
+                out_channels=1,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(
+                    name=conv_centerness_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_centerness_name + "_bias",
+                    initializer=Constant(value=0))))
+
+        self.scales_regs = []
+        for i in range(len(self.fpn_stride)):
+            lvl = int(math.log(int(self.fpn_stride[i]), 2))
+            feat_name = 'p{}_feat'.format(lvl)
+            scale_reg = self.add_sublayer(feat_name, ScaleReg())
+            self.scales_regs.append(scale_reg)
+
+    def _compute_locations_by_level(self, fpn_stride, feature):
+        """
+        Compute locations of anchor points of each FPN layer
+        Args:
+            fpn_stride (int): The stride of current FPN feature map
+            feature (Tensor): Tensor of current FPN feature map
+        Return:
+            Anchor points locations of current FPN feature map
+        """
+        shape_fm = paddle.shape(feature)
+        shape_fm.stop_gradient = True
+        h, w = shape_fm[2], shape_fm[3]
+        shift_x = paddle.arange(0, w * fpn_stride, fpn_stride)
+        shift_y = paddle.arange(0, h * fpn_stride, fpn_stride)
+        shift_x = paddle.unsqueeze(shift_x, axis=0)
+        shift_y = paddle.unsqueeze(shift_y, axis=1)
+        shift_x = paddle.expand(shift_x, shape=[h, w])
+        shift_y = paddle.expand(shift_y, shape=[h, w])
+        shift_x.stop_gradient = True
+        shift_y.stop_gradient = True
+        shift_x = paddle.reshape(shift_x, shape=[-1])
+        shift_y = paddle.reshape(shift_y, shape=[-1])
+        location = paddle.stack(
+            [shift_x, shift_y], axis=-1) + float(fpn_stride) / 2
+        location.stop_gradient = True
+        return location
+
+    def forward(self, fpn_feats, is_training):
+        assert len(fpn_feats) == len(
+            self.fpn_stride
+        ), "The size of fpn_feats is not equal to size of fpn_stride"
+        cls_logits_list = []
+        bboxes_reg_list = []
+        centerness_list = []
+        for scale_reg, fpn_stride, fpn_feat in zip(self.scales_regs,
+                                                   self.fpn_stride, fpn_feats):
+            fcos_cls_feat, fcos_reg_feat = self.fcos_feat(fpn_feat)
+            cls_logits = self.fcos_head_cls(fcos_cls_feat)
+            bbox_reg = scale_reg(self.fcos_head_reg(fcos_reg_feat))
+            if self.centerness_on_reg:
+                centerness = self.fcos_head_centerness(fcos_reg_feat)
+            else:
+                centerness = self.fcos_head_centerness(fcos_cls_feat)
+            if self.norm_reg_targets:
+                bbox_reg = F.relu(bbox_reg)
+                if not is_training:
+                    bbox_reg = bbox_reg * fpn_stride
+            else:
+                bbox_reg = paddle.exp(bbox_reg)
+            cls_logits_list.append(cls_logits)
+            bboxes_reg_list.append(bbox_reg)
+            centerness_list.append(centerness)
+
+        if not is_training:
+            locations_list = []
+            for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
+                location = self._compute_locations_by_level(fpn_stride, feature)
+                locations_list.append(location)
+
+            return locations_list, cls_logits_list, bboxes_reg_list, centerness_list
+        else:
+            return cls_logits_list, bboxes_reg_list, centerness_list
+
+    def get_loss(self, fcos_head_outs, tag_labels, tag_bboxes, tag_centerness):
+        cls_logits, bboxes_reg, centerness = fcos_head_outs
+        return self.fcos_loss(cls_logits, bboxes_reg, centerness, tag_labels,
+                              tag_bboxes, tag_centerness)
--- a/build/lib/ppdet/modeling/heads/keypoint_hrhrnet_head.py
+++ b/build/lib/ppdet/modeling/heads/keypoint_hrhrnet_head.py
@ -0,0 +1,108 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from ppdet.core.workspace import register
+from .. import layers as L
+from ..backbones.hrnet import BasicBlock
+
+
+@register
+class HrHRNetHead(nn.Layer):
+    __inject__ = ['loss']
+
+    def __init__(self, num_joints, loss='HrHRNetLoss', swahr=False, width=32):
+        """
+        Head for HigherHRNet network
+
+        Args:
+            num_joints (int): number of keypoints
+            hrloss (object): HrHRNetLoss instance
+            swahr (bool): whether to use swahr
+            width (int): hrnet channel width
+        """
+        super(HrHRNetHead, self).__init__()
+        self.loss = loss
+
+        self.num_joints = num_joints
+        num_featout1 = num_joints * 2
+        num_featout2 = num_joints
+        self.swahr = swahr
+        self.conv1 = L.Conv2d(width, num_featout1, 1, 1, 0, bias=True)
+        self.conv2 = L.Conv2d(width, num_featout2, 1, 1, 0, bias=True)
+        self.deconv = nn.Sequential(
+            L.ConvTranspose2d(
+                num_featout1 + width, width, 4, 2, 1, 0, bias=False),
+            L.BatchNorm2d(width),
+            L.ReLU())
+        self.blocks = nn.Sequential(*(BasicBlock(
+            num_channels=width,
+            num_filters=width,
+            has_se=False,
+            freeze_norm=False,
+            name='HrHRNetHead_{}'.format(i)) for i in range(4)))
+
+        self.interpolate = L.Upsample(2, mode='bilinear')
+        self.concat = L.Concat(dim=1)
+        if swahr:
+            self.scalelayer0 = nn.Sequential(
+                L.Conv2d(
+                    width, num_joints, 1, 1, 0, bias=True),
+                L.BatchNorm2d(num_joints),
+                L.ReLU(),
+                L.Conv2d(
+                    num_joints,
+                    num_joints,
+                    9,
+                    1,
+                    4,
+                    groups=num_joints,
+                    bias=True))
+            self.scalelayer1 = nn.Sequential(
+                L.Conv2d(
+                    width, num_joints, 1, 1, 0, bias=True),
+                L.BatchNorm2d(num_joints),
+                L.ReLU(),
+                L.Conv2d(
+                    num_joints,
+                    num_joints,
+                    9,
+                    1,
+                    4,
+                    groups=num_joints,
+                    bias=True))
+
+    def forward(self, feats, targets=None):
+        x1 = feats[0]
+        xo1 = self.conv1(x1)
+        x2 = self.blocks(self.deconv(self.concat((x1, xo1))))
+        xo2 = self.conv2(x2)
+        num_joints = self.num_joints
+        if self.training:
+            heatmap1, tagmap = paddle.split(xo1, 2, axis=1)
+            if self.swahr:
+                so1 = self.scalelayer0(x1)
+                so2 = self.scalelayer1(x2)
+                hrhrnet_outputs = ([heatmap1, so1], [xo2, so2], tagmap)
+                return self.loss(hrhrnet_outputs, targets)
+            else:
+                hrhrnet_outputs = (heatmap1, xo2, tagmap)
+                return self.loss(hrhrnet_outputs, targets)
+
+        # averaged heatmap, upsampled tagmap
+        upsampled = self.interpolate(xo1)
+        avg = (upsampled[:, :num_joints] + xo2[:, :num_joints]) / 2
+        return avg, upsampled[:, num_joints:]
--- a/build/lib/ppdet/modeling/heads/mask_head.py
+++ b/build/lib/ppdet/modeling/heads/mask_head.py
@ -0,0 +1,250 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import KaimingNormal
+
+from ppdet.core.workspace import register, create
+from ppdet.modeling.layers import ConvNormLayer
+from .roi_extractor import RoIAlign
+
+
+@register
+class MaskFeat(nn.Layer):
+    """
+    Feature extraction in Mask head
+
+    Args:
+        in_channel (int): Input channels
+        out_channel (int): Output channels
+        num_convs (int): The number of conv layers, default 4
+        norm_type (string | None): Norm type, bn, gn, sync_bn are available,
+            default None
+    """
+
+    def __init__(self,
+                 in_channel=256,
+                 out_channel=256,
+                 num_convs=4,
+                 norm_type=None):
+        super(MaskFeat, self).__init__()
+        self.num_convs = num_convs
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.norm_type = norm_type
+        fan_conv = out_channel * 3 * 3
+        fan_deconv = out_channel * 2 * 2
+
+        mask_conv = nn.Sequential()
+        if norm_type == 'gn':
+            for i in range(self.num_convs):
+                conv_name = 'mask_inter_feat_{}'.format(i + 1)
+                mask_conv.add_sublayer(
+                    conv_name,
+                    ConvNormLayer(
+                        ch_in=in_channel if i == 0 else out_channel,
+                        ch_out=out_channel,
+                        filter_size=3,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        initializer=KaimingNormal(fan_in=fan_conv),
+                        skip_quant=True))
+                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
+        else:
+            for i in range(self.num_convs):
+                conv_name = 'mask_inter_feat_{}'.format(i + 1)
+                conv = nn.Conv2D(
+                    in_channels=in_channel if i == 0 else out_channel,
+                    out_channels=out_channel,
+                    kernel_size=3,
+                    padding=1,
+                    weight_attr=paddle.ParamAttr(
+                        initializer=KaimingNormal(fan_in=fan_conv)))
+                conv.skip_quant = True
+                mask_conv.add_sublayer(conv_name, conv)
+                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
+        mask_conv.add_sublayer(
+            'conv5_mask',
+            nn.Conv2DTranspose(
+                in_channels=self.in_channel,
+                out_channels=self.out_channel,
+                kernel_size=2,
+                stride=2,
+                weight_attr=paddle.ParamAttr(
+                    initializer=KaimingNormal(fan_in=fan_deconv))))
+        mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU())
+        self.upsample = mask_conv
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        if isinstance(input_shape, (list, tuple)):
+            input_shape = input_shape[0]
+        return {'in_channel': input_shape.channels, }
+
+    def out_channels(self):
+        return self.out_channel
+
+    def forward(self, feats):
+        return self.upsample(feats)
+
+
+@register
+class MaskHead(nn.Layer):
+    __shared__ = ['num_classes']
+    __inject__ = ['mask_assigner']
+    """
+    RCNN mask head
+
+    Args:
+        head (nn.Layer): Extract feature in mask head
+        roi_extractor (object): The module of RoI Extractor
+        mask_assigner (object): The module of Mask Assigner, 
+            label and sample the mask
+        num_classes (int): The number of classes
+        share_bbox_feat (bool): Whether to share the feature from bbox head,
+            default false
+    """
+
+    def __init__(self,
+                 head,
+                 roi_extractor=RoIAlign().__dict__,
+                 mask_assigner='MaskAssigner',
+                 num_classes=80,
+                 share_bbox_feat=False):
+        super(MaskHead, self).__init__()
+        self.num_classes = num_classes
+
+        self.roi_extractor = roi_extractor
+        if isinstance(roi_extractor, dict):
+            self.roi_extractor = RoIAlign(**roi_extractor)
+        self.head = head
+        self.in_channels = head.out_channels()
+        self.mask_assigner = mask_assigner
+        self.share_bbox_feat = share_bbox_feat
+        self.bbox_head = None
+
+        self.mask_fcn_logits = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.num_classes,
+            kernel_size=1,
+            weight_attr=paddle.ParamAttr(initializer=KaimingNormal(
+                fan_in=self.num_classes)))
+        self.mask_fcn_logits.skip_quant = True
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        roi_pooler = cfg['roi_extractor']
+        assert isinstance(roi_pooler, dict)
+        kwargs = RoIAlign.from_config(cfg, input_shape)
+        roi_pooler.update(kwargs)
+        kwargs = {'input_shape': input_shape}
+        head = create(cfg['head'], **kwargs)
+        return {
+            'roi_extractor': roi_pooler,
+            'head': head,
+        }
+
+    def get_loss(self, mask_logits, mask_label, mask_target, mask_weight):
+        mask_label = F.one_hot(mask_label, self.num_classes).unsqueeze([2, 3])
+        mask_label = paddle.expand_as(mask_label, mask_logits)
+        mask_label.stop_gradient = True
+        mask_pred = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label))
+        shape = mask_logits.shape
+        mask_pred = paddle.reshape(mask_pred, [shape[0], shape[2], shape[3]])
+
+        mask_target = mask_target.cast('float32')
+        mask_weight = mask_weight.unsqueeze([1, 2])
+        loss_mask = F.binary_cross_entropy_with_logits(
+            mask_pred, mask_target, weight=mask_weight, reduction="mean")
+        return loss_mask
+
+    def forward_train(self, body_feats, rois, rois_num, inputs, targets,
+                      bbox_feat):
+        """
+        body_feats (list[Tensor]): Multi-level backbone features
+        rois (list[Tensor]): Proposals for each batch with shape [N, 4]
+        rois_num (Tensor): The number of proposals for each batch
+        inputs (dict): ground truth info
+        """
+        tgt_labels, _, tgt_gt_inds = targets
+        rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner(
+            rois, tgt_labels, tgt_gt_inds, inputs)
+
+        if self.share_bbox_feat:
+            rois_feat = paddle.gather(bbox_feat, mask_index)
+        else:
+            rois_feat = self.roi_extractor(body_feats, rois, rois_num)
+        mask_feat = self.head(rois_feat)
+        mask_logits = self.mask_fcn_logits(mask_feat)
+
+        loss_mask = self.get_loss(mask_logits, tgt_classes, tgt_masks,
+                                  tgt_weights)
+        return {'loss_mask': loss_mask}
+
+    def forward_test(self,
+                     body_feats,
+                     rois,
+                     rois_num,
+                     scale_factor,
+                     feat_func=None):
+        """
+        body_feats (list[Tensor]): Multi-level backbone features
+        rois (Tensor): Prediction from bbox head with shape [N, 6]
+        rois_num (Tensor): The number of prediction for each batch
+        scale_factor (Tensor): The scale factor from origin size to input size
+        """
+        if rois.shape[0] == 0:
+            mask_out = paddle.full([1, 1, 1, 1], -1)
+        else:
+            bbox = [rois[:, 2:]]
+            labels = rois[:, 0].cast('int32')
+            rois_feat = self.roi_extractor(body_feats, bbox, rois_num)
+            if self.share_bbox_feat:
+                assert feat_func is not None
+                rois_feat = feat_func(rois_feat)
+
+            mask_feat = self.head(rois_feat)
+            mask_logit = self.mask_fcn_logits(mask_feat)
+            mask_num_class = mask_logit.shape[1]
+            if mask_num_class == 1:
+                mask_out = F.sigmoid(mask_logit)
+            else:
+                num_masks = mask_logit.shape[0]
+                mask_out = []
+                # TODO: need to optimize gather
+                for i in range(mask_logit.shape[0]):
+                    pred_masks = paddle.unsqueeze(
+                        mask_logit[i, :, :, :], axis=0)
+                    mask = paddle.gather(pred_masks, labels[i], axis=1)
+                    mask_out.append(mask)
+                mask_out = F.sigmoid(paddle.concat(mask_out))
+        return mask_out
+
+    def forward(self,
+                body_feats,
+                rois,
+                rois_num,
+                inputs,
+                targets=None,
+                bbox_feat=None,
+                feat_func=None):
+        if self.training:
+            return self.forward_train(body_feats, rois, rois_num, inputs,
+                                      targets, bbox_feat)
+        else:
+            im_scale = inputs['scale_factor']
+            return self.forward_test(body_feats, rois, rois_num, im_scale,
+                                     feat_func)
--- a/build/lib/ppdet/modeling/heads/roi_extractor.py
+++ b/build/lib/ppdet/modeling/heads/roi_extractor.py
@ -0,0 +1,111 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from ppdet.core.workspace import register
+from ppdet.modeling import ops
+
+
+def _to_list(v):
+    if not isinstance(v, (list, tuple)):
+        return [v]
+    return v
+
+
+@register
+class RoIAlign(object):
+    """
+    RoI Align module
+
+    For more details, please refer to the document of roi_align in
+    in ppdet/modeing/ops.py
+
+    Args:
+        resolution (int): The output size, default 14
+        spatial_scale (float): Multiplicative spatial scale factor to translate
+            ROI coords from their input scale to the scale used when pooling.
+            default 0.0625
+        sampling_ratio (int): The number of sampling points in the interpolation
+            grid, default 0
+        canconical_level (int): The referring level of FPN layer with 
+            specified level. default 4
+        canonical_size (int): The referring scale of FPN layer with 
+            specified scale. default 224
+        start_level (int): The start level of FPN layer to extract RoI feature,
+            default 0
+        end_level (int): The end level of FPN layer to extract RoI feature,
+            default 3
+        aligned (bool): Whether to add offset to rois' coord in roi_align.
+            default false
+    """
+
+    def __init__(self,
+                 resolution=14,
+                 spatial_scale=0.0625,
+                 sampling_ratio=0,
+                 canconical_level=4,
+                 canonical_size=224,
+                 start_level=0,
+                 end_level=3,
+                 aligned=False):
+        super(RoIAlign, self).__init__()
+        self.resolution = resolution
+        self.spatial_scale = _to_list(spatial_scale)
+        self.sampling_ratio = sampling_ratio
+        self.canconical_level = canconical_level
+        self.canonical_size = canonical_size
+        self.start_level = start_level
+        self.end_level = end_level
+        self.aligned = aligned
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {'spatial_scale': [1. / i.stride for i in input_shape]}
+
+    def __call__(self, feats, roi, rois_num):
+        roi = paddle.concat(roi) if len(roi) > 1 else roi[0]
+        if len(feats) == 1:
+            rois_feat = ops.roi_align(
+                feats[self.start_level],
+                roi,
+                self.resolution,
+                self.spatial_scale[0],
+                rois_num=rois_num,
+                aligned=self.aligned)
+        else:
+            offset = 2
+            k_min = self.start_level + offset
+            k_max = self.end_level + offset
+            rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals(
+                roi,
+                k_min,
+                k_max,
+                self.canconical_level,
+                self.canonical_size,
+                rois_num=rois_num)
+            rois_feat_list = []
+            for lvl in range(self.start_level, self.end_level + 1):
+                roi_feat = ops.roi_align(
+                    feats[lvl],
+                    rois_dist[lvl],
+                    self.resolution,
+                    self.spatial_scale[lvl],
+                    sampling_ratio=self.sampling_ratio,
+                    rois_num=rois_num_dist[lvl],
+                    aligned=self.aligned)
+                rois_feat_list.append(roi_feat)
+            rois_feat_shuffle = paddle.concat(rois_feat_list)
+            rois_feat = paddle.gather(rois_feat_shuffle, restore_index)
+
+        return rois_feat
--- a/build/lib/ppdet/modeling/heads/s2anet_head.py
+++ b/build/lib/ppdet/modeling/heads/s2anet_head.py
@ -0,0 +1,841 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, Constant
+from ppdet.core.workspace import register
+from ppdet.modeling.proposal_generator.target_layer import RBoxAssigner
+import numpy as np
+
+
+class S2ANetAnchorGenerator(nn.Layer):
+    """
+    AnchorGenerator by paddle
+    """
+
+    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
+        super(S2ANetAnchorGenerator, self).__init__()
+        self.base_size = base_size
+        self.scales = paddle.to_tensor(scales)
+        self.ratios = paddle.to_tensor(ratios)
+        self.scale_major = scale_major
+        self.ctr = ctr
+        self.base_anchors = self.gen_base_anchors()
+
+    @property
+    def num_base_anchors(self):
+        return self.base_anchors.shape[0]
+
+    def gen_base_anchors(self):
+        w = self.base_size
+        h = self.base_size
+        if self.ctr is None:
+            x_ctr = 0.5 * (w - 1)
+            y_ctr = 0.5 * (h - 1)
+        else:
+            x_ctr, y_ctr = self.ctr
+
+        h_ratios = paddle.sqrt(self.ratios)
+        w_ratios = 1 / h_ratios
+        if self.scale_major:
+            ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
+            hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
+        else:
+            ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
+            hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
+
+        base_anchors = paddle.stack(
+            [
+                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
+                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
+            ],
+            axis=-1)
+        base_anchors = paddle.round(base_anchors)
+        return base_anchors
+
+    def _meshgrid(self, x, y, row_major=True):
+        yy, xx = paddle.meshgrid(x, y)
+        yy = yy.reshape([-1])
+        xx = xx.reshape([-1])
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+
+    def forward(self, featmap_size, stride=16):
+        # featmap_size*stride project it to original area
+        base_anchors = self.base_anchors
+
+        feat_h = featmap_size[0]
+        feat_w = featmap_size[1]
+        shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
+        shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
+
+        all_anchors = base_anchors[:, :] + shifts[:, :]
+        all_anchors = all_anchors.reshape([feat_h * feat_w, 4])
+        return all_anchors
+
+    def valid_flags(self, featmap_size, valid_size):
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = paddle.zeros([feat_w], dtype='uint8')
+        valid_y = paddle.zeros([feat_h], dtype='uint8')
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        valid = valid[:, None].expand(
+            [valid.size(0), self.num_base_anchors]).reshape([-1])
+        return valid
+
+
+class AlignConv(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
+        super(AlignConv, self).__init__()
+        self.kernel_size = kernel_size
+        self.align_conv = paddle.vision.ops.DeformConv2D(
+            in_channels,
+            out_channels,
+            kernel_size=self.kernel_size,
+            padding=(self.kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+            bias_attr=None)
+
+    @paddle.no_grad()
+    def get_offset(self, anchors, featmap_size, stride):
+        """
+        Args:
+            anchors: [M,5] xc,yc,w,h,angle
+            featmap_size: (feat_h, feat_w)
+            stride: 8
+        Returns:
+
+        """
+        anchors = paddle.reshape(anchors, [-1, 5])  # (NA,5)
+        dtype = anchors.dtype
+        feat_h, feat_w = featmap_size
+        pad = (self.kernel_size - 1) // 2
+        idx = paddle.arange(-pad, pad + 1, dtype=dtype)
+
+        yy, xx = paddle.meshgrid(idx, idx)
+        xx = paddle.reshape(xx, [-1])
+        yy = paddle.reshape(yy, [-1])
+
+        # get sampling locations of default conv
+        xc = paddle.arange(0, feat_w, dtype=dtype)
+        yc = paddle.arange(0, feat_h, dtype=dtype)
+        yc, xc = paddle.meshgrid(yc, xc)
+
+        xc = paddle.reshape(xc, [-1, 1])
+        yc = paddle.reshape(yc, [-1, 1])
+        x_conv = xc + xx
+        y_conv = yc + yy
+
+        # get sampling locations of anchors
+        # x_ctr, y_ctr, w, h, a = np.unbind(anchors, dim=1)
+        x_ctr = anchors[:, 0]
+        y_ctr = anchors[:, 1]
+        w = anchors[:, 2]
+        h = anchors[:, 3]
+        a = anchors[:, 4]
+
+        x_ctr = paddle.reshape(x_ctr, [x_ctr.shape[0], 1])
+        y_ctr = paddle.reshape(y_ctr, [y_ctr.shape[0], 1])
+        w = paddle.reshape(w, [w.shape[0], 1])
+        h = paddle.reshape(h, [h.shape[0], 1])
+        a = paddle.reshape(a, [a.shape[0], 1])
+
+        x_ctr = x_ctr / stride
+        y_ctr = y_ctr / stride
+        w_s = w / stride
+        h_s = h / stride
+        cos, sin = paddle.cos(a), paddle.sin(a)
+        dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
+        x, y = dw * xx, dh * yy
+        xr = cos * x - sin * y
+        yr = sin * x + cos * y
+        x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
+        # get offset filed
+        offset_x = x_anchor - x_conv
+        offset_y = y_anchor - y_conv
+        # x, y in anchors is opposite in image coordinates,
+        # so we stack them with y, x other than x, y
+        offset = paddle.stack([offset_y, offset_x], axis=-1)
+        # NA,ks*ks*2
+        # [NA, ks, ks, 2] --> [NA, ks*ks*2]
+        offset = paddle.reshape(offset, [offset.shape[0], -1])
+        # [NA, ks*ks*2] --> [ks*ks*2, NA]
+        offset = paddle.transpose(offset, [1, 0])
+        # [NA, ks*ks*2] --> [1, ks*ks*2, H, W]
+        offset = paddle.reshape(offset, [1, -1, feat_h, feat_w])
+        return offset
+
+    def forward(self, x, refine_anchors, stride):
+        featmap_size = (x.shape[2], x.shape[3])
+        offset = self.get_offset(refine_anchors, featmap_size, stride)
+        x = F.relu(self.align_conv(x, offset))
+        return x
+
+
+@register
+class S2ANetHead(nn.Layer):
+    """
+    S2Anet head
+    Args:
+        stacked_convs (int): number of stacked_convs
+        feat_in (int): input channels of feat
+        feat_out (int): output channels of feat
+        num_classes (int): num_classes
+        anchor_strides (list): stride of anchors
+        anchor_scales (list): scale of anchors
+        anchor_ratios (list): ratios of anchors
+        target_means (list): target_means
+        target_stds (list): target_stds
+        align_conv_type (str): align_conv_type ['Conv', 'AlignConv']
+        align_conv_size (int): kernel size of align_conv
+        use_sigmoid_cls (bool): use sigmoid_cls or not
+        reg_loss_weight (list): loss weight for regression
+    """
+    __shared__ = ['num_classes']
+    __inject__ = ['anchor_assign']
+
+    def __init__(self,
+                 stacked_convs=2,
+                 feat_in=256,
+                 feat_out=256,
+                 num_classes=15,
+                 anchor_strides=[8, 16, 32, 64, 128],
+                 anchor_scales=[4],
+                 anchor_ratios=[1.0],
+                 target_means=0.0,
+                 target_stds=1.0,
+                 align_conv_type='AlignConv',
+                 align_conv_size=3,
+                 use_sigmoid_cls=True,
+                 anchor_assign=RBoxAssigner().__dict__,
+                 reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.0],
+                 cls_loss_weight=[1.0, 1.0]):
+        super(S2ANetHead, self).__init__()
+        self.stacked_convs = stacked_convs
+        self.feat_in = feat_in
+        self.feat_out = feat_out
+        self.anchor_list = None
+        self.anchor_scales = anchor_scales
+        self.anchor_ratios = anchor_ratios
+        self.anchor_strides = anchor_strides
+        self.anchor_base_sizes = list(anchor_strides)
+        self.target_means = target_means
+        self.target_stds = target_stds
+        assert align_conv_type in ['AlignConv', 'Conv', 'DCN']
+        self.align_conv_type = align_conv_type
+        self.align_conv_size = align_conv_size
+
+        self.use_sigmoid_cls = use_sigmoid_cls
+        self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1
+        self.sampling = False
+        self.anchor_assign = anchor_assign
+        self.reg_loss_weight = reg_loss_weight
+        self.cls_loss_weight = cls_loss_weight
+
+        self.s2anet_head_out = None
+
+        # anchor
+        self.anchor_generators = []
+        for anchor_base in self.anchor_base_sizes:
+            self.anchor_generators.append(
+                S2ANetAnchorGenerator(anchor_base, anchor_scales,
+                                      anchor_ratios))
+        self.anchor_generators = paddle.nn.LayerList(self.anchor_generators)
+        self.add_sublayer('s2anet_anchor_gen', self.anchor_generators)
+
+        self.fam_cls_convs = nn.Sequential()
+        self.fam_reg_convs = nn.Sequential()
+
+        for i in range(self.stacked_convs):
+            chan_in = self.feat_in if i == 0 else self.feat_out
+
+            self.fam_cls_convs.add_sublayer(
+                'fam_cls_conv_{}'.format(i),
+                nn.Conv2D(
+                    in_channels=chan_in,
+                    out_channels=self.feat_out,
+                    kernel_size=3,
+                    padding=1,
+                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                    bias_attr=ParamAttr(initializer=Constant(0))))
+
+            self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i),
+                                            nn.ReLU())
+
+            self.fam_reg_convs.add_sublayer(
+                'fam_reg_conv_{}'.format(i),
+                nn.Conv2D(
+                    in_channels=chan_in,
+                    out_channels=self.feat_out,
+                    kernel_size=3,
+                    padding=1,
+                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                    bias_attr=ParamAttr(initializer=Constant(0))))
+
+            self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i),
+                                            nn.ReLU())
+
+        self.fam_reg = nn.Conv2D(
+            self.feat_out,
+            5,
+            1,
+            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+            bias_attr=ParamAttr(initializer=Constant(0)))
+        prior_prob = 0.01
+        bias_init = float(-np.log((1 - prior_prob) / prior_prob))
+        self.fam_cls = nn.Conv2D(
+            self.feat_out,
+            self.cls_out_channels,
+            1,
+            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+            bias_attr=ParamAttr(initializer=Constant(bias_init)))
+
+        if self.align_conv_type == "AlignConv":
+            self.align_conv = AlignConv(self.feat_out, self.feat_out,
+                                        self.align_conv_size)
+        elif self.align_conv_type == "Conv":
+            self.align_conv = nn.Conv2D(
+                self.feat_out,
+                self.feat_out,
+                self.align_conv_size,
+                padding=(self.align_conv_size - 1) // 2,
+                bias_attr=ParamAttr(initializer=Constant(0)))
+
+        elif self.align_conv_type == "DCN":
+            self.align_conv_offset = nn.Conv2D(
+                self.feat_out,
+                2 * self.align_conv_size**2,
+                1,
+                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                bias_attr=ParamAttr(initializer=Constant(0)))
+
+            self.align_conv = paddle.vision.ops.DeformConv2D(
+                self.feat_out,
+                self.feat_out,
+                self.align_conv_size,
+                padding=(self.align_conv_size - 1) // 2,
+                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                bias_attr=False)
+
+        self.or_conv = nn.Conv2D(
+            self.feat_out,
+            self.feat_out,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+            bias_attr=ParamAttr(initializer=Constant(0)))
+
+        # ODM
+        self.odm_cls_convs = nn.Sequential()
+        self.odm_reg_convs = nn.Sequential()
+
+        for i in range(self.stacked_convs):
+            ch_in = self.feat_out
+            # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out
+
+            self.odm_cls_convs.add_sublayer(
+                'odm_cls_conv_{}'.format(i),
+                nn.Conv2D(
+                    in_channels=ch_in,
+                    out_channels=self.feat_out,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                    bias_attr=ParamAttr(initializer=Constant(0))))
+
+            self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i),
+                                            nn.ReLU())
+
+            self.odm_reg_convs.add_sublayer(
+                'odm_reg_conv_{}'.format(i),
+                nn.Conv2D(
+                    in_channels=self.feat_out,
+                    out_channels=self.feat_out,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+                    bias_attr=ParamAttr(initializer=Constant(0))))
+
+            self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i),
+                                            nn.ReLU())
+
+        self.odm_cls = nn.Conv2D(
+            self.feat_out,
+            self.cls_out_channels,
+            3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+            bias_attr=ParamAttr(initializer=Constant(bias_init)))
+        self.odm_reg = nn.Conv2D(
+            self.feat_out,
+            5,
+            3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
+            bias_attr=ParamAttr(initializer=Constant(0)))
+
+        self.featmap_size_list = []
+        self.init_anchors_list = []
+        self.rbox_anchors_list = []
+        self.refine_anchor_list = []
+
+    def forward(self, feats):
+        fam_reg_branch_list = []
+        fam_cls_branch_list = []
+
+        odm_reg_branch_list = []
+        odm_cls_branch_list = []
+
+        fam_reg1_branch_list = []
+
+        self.featmap_size_list = []
+        self.init_anchors_list = []
+        self.rbox_anchors_list = []
+        self.refine_anchor_list = []
+
+        for i, feat in enumerate(feats):
+            # prepare anchor
+            featmap_size = paddle.shape(feat)[-2:]
+            self.featmap_size_list.append(featmap_size)
+            init_anchors = self.anchor_generators[i](featmap_size,
+                                                     self.anchor_strides[i])
+            init_anchors = paddle.reshape(
+                init_anchors, [featmap_size[0] * featmap_size[1], 4])
+            self.init_anchors_list.append(init_anchors)
+
+            rbox_anchors = self.rect2rbox(init_anchors)
+            self.rbox_anchors_list.append(rbox_anchors)
+
+            fam_cls_feat = self.fam_cls_convs(feat)
+            fam_cls = self.fam_cls(fam_cls_feat)
+            # [N, CLS, H, W] --> [N, H, W, CLS]
+            fam_cls = fam_cls.transpose([0, 2, 3, 1])
+            fam_cls_reshape = paddle.reshape(
+                fam_cls, [fam_cls.shape[0], -1, self.cls_out_channels])
+            fam_cls_branch_list.append(fam_cls_reshape)
+
+            fam_reg_feat = self.fam_reg_convs(feat)
+
+            fam_reg = self.fam_reg(fam_reg_feat)
+            # [N, 5, H, W] --> [N, H, W, 5]
+            fam_reg = fam_reg.transpose([0, 2, 3, 1])
+            fam_reg_reshape = paddle.reshape(fam_reg, [fam_reg.shape[0], -1, 5])
+            fam_reg_branch_list.append(fam_reg_reshape)
+
+            # refine anchors
+            fam_reg1 = fam_reg.clone()
+            fam_reg1.stop_gradient = True
+            rbox_anchors.stop_gradient = True
+            fam_reg1_branch_list.append(fam_reg1)
+            refine_anchor = self.bbox_decode(
+                fam_reg1, rbox_anchors, self.target_stds, self.target_means)
+            self.refine_anchor_list.append(refine_anchor)
+
+            if self.align_conv_type == 'AlignConv':
+                align_feat = self.align_conv(feat,
+                                             refine_anchor.clone(),
+                                             self.anchor_strides[i])
+            elif self.align_conv_type == 'DCN':
+                align_offset = self.align_conv_offset(feat)
+                align_feat = self.align_conv(feat, align_offset)
+            elif self.align_conv_type == 'Conv':
+                align_feat = self.align_conv(feat)
+
+            or_feat = self.or_conv(align_feat)
+            odm_reg_feat = or_feat
+            odm_cls_feat = or_feat
+
+            odm_reg_feat = self.odm_reg_convs(odm_reg_feat)
+            odm_cls_feat = self.odm_cls_convs(odm_cls_feat)
+
+            odm_cls_score = self.odm_cls(odm_cls_feat)
+            # [N, CLS, H, W] --> [N, H, W, CLS]
+            odm_cls_score = odm_cls_score.transpose([0, 2, 3, 1])
+            odm_cls_score_reshape = paddle.reshape(
+                odm_cls_score,
+                [odm_cls_score.shape[0], -1, self.cls_out_channels])
+
+            odm_cls_branch_list.append(odm_cls_score_reshape)
+
+            odm_bbox_pred = self.odm_reg(odm_reg_feat)
+            # [N, 5, H, W] --> [N, H, W, 5]
+            odm_bbox_pred = odm_bbox_pred.transpose([0, 2, 3, 1])
+            odm_bbox_pred_reshape = paddle.reshape(
+                odm_bbox_pred, [odm_bbox_pred.shape[0], -1, 5])
+            odm_reg_branch_list.append(odm_bbox_pred_reshape)
+
+        self.s2anet_head_out = (fam_cls_branch_list, fam_reg_branch_list,
+                                odm_cls_branch_list, odm_reg_branch_list)
+        return self.s2anet_head_out
+
+    def rect2rbox(self, bboxes):
+        """
+        :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
+        :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
+        """
+        num_boxes = paddle.shape(bboxes)[0]
+        x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
+        y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
+        edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
+        edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])
+
+        rbox_w = paddle.maximum(edges1, edges2)
+        rbox_h = paddle.minimum(edges1, edges2)
+
+        # set angle
+        inds = edges1 < edges2
+        inds = paddle.cast(inds, 'int32')
+        inds1 = inds * paddle.arange(0, num_boxes)
+        rboxes_angle = inds1 * np.pi / 2.0
+
+        rboxes = paddle.stack(
+            (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=1)
+        return rboxes
+
+    # deltas to rbox
+    def delta2rbox(self, rrois, deltas, means, stds, wh_ratio_clip=1e-6):
+        """
+        :param rrois: (cx, cy, w, h, theta)
+        :param deltas: (dx, dy, dw, dh, dtheta)
+        :param means: means of anchor
+        :param stds: stds of anchor
+        :param wh_ratio_clip: clip threshold of wh_ratio
+        :return:
+        """
+        deltas = paddle.reshape(deltas, [-1, 5])
+        rrois = paddle.reshape(rrois, [-1, 5])
+        pd_means = paddle.ones(shape=[5]) * means
+        pd_stds = paddle.ones(shape=[5]) * stds
+        denorm_deltas = deltas * pd_stds + pd_means
+
+        dx = denorm_deltas[:, 0]
+        dy = denorm_deltas[:, 1]
+        dw = denorm_deltas[:, 2]
+        dh = denorm_deltas[:, 3]
+        dangle = denorm_deltas[:, 4]
+        max_ratio = np.abs(np.log(wh_ratio_clip))
+        dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
+        dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
+
+        rroi_x = rrois[:, 0]
+        rroi_y = rrois[:, 1]
+        rroi_w = rrois[:, 2]
+        rroi_h = rrois[:, 3]
+        rroi_angle = rrois[:, 4]
+
+        gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
+            rroi_angle) + rroi_x
+        gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
+            rroi_angle) + rroi_y
+        gw = rroi_w * dw.exp()
+        gh = rroi_h * dh.exp()
+        ga = np.pi * dangle + rroi_angle
+        ga = (ga + np.pi / 4) % np.pi - np.pi / 4
+        bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
+        return bboxes
+
+    def bbox_decode(self, bbox_preds, anchors, stds, means, wh_ratio_clip=1e-6):
+        """decode bbox from deltas
+        Args:
+            bbox_preds: bbox_preds, shape=[N,H,W,5]
+            anchors: anchors, shape=[H,W,5]
+        return:
+            bboxes: return decoded bboxes, shape=[N*H*W,5]
+        """
+
+        num_imgs, H, W, _ = bbox_preds.shape
+        bbox_delta = paddle.reshape(bbox_preds, [-1, 5])
+        bboxes = self.delta2rbox(anchors, bbox_delta, means, stds,
+                                 wh_ratio_clip)
+        return bboxes
+
+    def get_prediction(self, nms_pre):
+        refine_anchors = self.refine_anchor_list
+        fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = self.s2anet_head_out
+        pred_scores, pred_bboxes = self.get_bboxes(
+            odm_cls_branch_list,
+            odm_reg_branch_list,
+            refine_anchors,
+            nms_pre,
+            cls_out_channels=self.cls_out_channels,
+            use_sigmoid_cls=self.use_sigmoid_cls)
+
+        return pred_scores, pred_bboxes
+
+    def smooth_l1_loss(self, pred, label, delta=1.0 / 9.0):
+        """
+        Args:
+            pred: pred score
+            label: label
+            delta: delta
+        Returns: loss
+        """
+        assert pred.shape == label.shape and label.numel() > 0
+        assert delta > 0
+        diff = paddle.abs(pred - label)
+        loss = paddle.where(diff < delta, 0.5 * diff * diff / delta,
+                            diff - 0.5 * delta)
+        return loss
+
+    def get_fam_loss(self, fam_target, s2anet_head_out):
+        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
+         pos_inds, neg_inds) = fam_target
+        fam_cls_score, fam_bbox_pred = s2anet_head_out
+
+        # step1:  sample count
+        num_total_samples = len(pos_inds) + len(
+            neg_inds) if self.sampling else len(pos_inds)
+        num_total_samples = max(1, num_total_samples)
+
+        # step2: calc cls loss
+        feat_labels = feat_labels.reshape(-1)
+        feat_label_weights = feat_label_weights.reshape(-1)
+        fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
+        fam_cls_score1 = fam_cls_score
+
+        # gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
+        feat_labels = feat_labels + 1
+        feat_labels = paddle.to_tensor(feat_labels)
+        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
+        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
+        feat_labels_one_hot.stop_gradient = True
+
+        num_total_samples = paddle.to_tensor(
+            num_total_samples, dtype='float32', stop_gradient=True)
+
+        fam_cls = F.sigmoid_focal_loss(
+            fam_cls_score1,
+            feat_labels_one_hot,
+            normalizer=num_total_samples,
+            reduction='none')
+
+        feat_label_weights = feat_label_weights.reshape(
+            feat_label_weights.shape[0], 1)
+        feat_label_weights = np.repeat(
+            feat_label_weights, self.cls_out_channels, axis=1)
+        feat_label_weights = paddle.to_tensor(
+            feat_label_weights, stop_gradient=True)
+
+        fam_cls = fam_cls * feat_label_weights
+        fam_cls_total = paddle.sum(fam_cls)
+
+        # step3: regression loss
+        feat_bbox_targets = paddle.to_tensor(
+            feat_bbox_targets, dtype='float32', stop_gradient=True)
+        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
+        fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
+        fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
+        fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
+        loss_weight = paddle.to_tensor(
+            self.reg_loss_weight, dtype='float32', stop_gradient=True)
+        fam_bbox = paddle.multiply(fam_bbox, loss_weight)
+        feat_bbox_weights = paddle.to_tensor(
+            feat_bbox_weights, stop_gradient=True)
+        fam_bbox = fam_bbox * feat_bbox_weights
+        fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
+
+        fam_cls_loss_weight = paddle.to_tensor(
+            self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
+        fam_cls_loss = fam_cls_total * fam_cls_loss_weight
+        fam_reg_loss = paddle.add_n(fam_bbox_total)
+        return fam_cls_loss, fam_reg_loss
+
+    def get_odm_loss(self, odm_target, s2anet_head_out):
+        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
+         pos_inds, neg_inds) = odm_target
+        odm_cls_score, odm_bbox_pred = s2anet_head_out
+
+        # step1:  sample count
+        num_total_samples = len(pos_inds) + len(
+            neg_inds) if self.sampling else len(pos_inds)
+        num_total_samples = max(1, num_total_samples)
+
+        # step2: calc cls loss
+        feat_labels = feat_labels.reshape(-1)
+        feat_label_weights = feat_label_weights.reshape(-1)
+        odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
+        odm_cls_score1 = odm_cls_score
+
+        # gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
+        # for debug 0426
+        feat_labels = feat_labels + 1
+        feat_labels = paddle.to_tensor(feat_labels)
+        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
+        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
+        feat_labels_one_hot.stop_gradient = True
+
+        num_total_samples = paddle.to_tensor(
+            num_total_samples, dtype='float32', stop_gradient=True)
+
+        odm_cls = F.sigmoid_focal_loss(
+            odm_cls_score1,
+            feat_labels_one_hot,
+            normalizer=num_total_samples,
+            reduction='none')
+
+        feat_label_weights = feat_label_weights.reshape(
+            feat_label_weights.shape[0], 1)
+        feat_label_weights = np.repeat(
+            feat_label_weights, self.cls_out_channels, axis=1)
+        feat_label_weights = paddle.to_tensor(
+            feat_label_weights, stop_gradient=True)
+
+        odm_cls = odm_cls * feat_label_weights
+        odm_cls_total = paddle.sum(odm_cls)
+
+        # step3: regression loss
+        feat_bbox_targets = paddle.to_tensor(
+            feat_bbox_targets, dtype='float32', stop_gradient=True)
+        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
+        odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
+        odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
+        odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
+        loss_weight = paddle.to_tensor(
+            self.reg_loss_weight, dtype='float32', stop_gradient=True)
+        odm_bbox = paddle.multiply(odm_bbox, loss_weight)
+        feat_bbox_weights = paddle.to_tensor(
+            feat_bbox_weights, stop_gradient=True)
+        odm_bbox = odm_bbox * feat_bbox_weights
+        odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
+
+        odm_cls_loss_weight = paddle.to_tensor(
+            self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
+        odm_cls_loss = odm_cls_total * odm_cls_loss_weight
+        odm_reg_loss = paddle.add_n(odm_bbox_total)
+        return odm_cls_loss, odm_reg_loss
+
+    def get_loss(self, inputs):
+        # inputs: im_id image im_shape scale_factor gt_bbox gt_class is_crowd
+
+        # compute loss
+        fam_cls_loss_lst = []
+        fam_reg_loss_lst = []
+        odm_cls_loss_lst = []
+        odm_reg_loss_lst = []
+
+        im_shape = inputs['im_shape']
+        for im_id in range(im_shape.shape[0]):
+            np_im_shape = inputs['im_shape'][im_id].numpy()
+            np_scale_factor = inputs['scale_factor'][im_id].numpy()
+            # data_format: (xc, yc, w, h, theta)
+            gt_bboxes = inputs['gt_rbox'][im_id].numpy()
+            gt_labels = inputs['gt_class'][im_id].numpy()
+            is_crowd = inputs['is_crowd'][im_id].numpy()
+            gt_labels = gt_labels + 1
+
+            # FAM
+            for idx, rbox_anchors in enumerate(self.rbox_anchors_list):
+                rbox_anchors = rbox_anchors.numpy()
+                rbox_anchors = rbox_anchors.reshape(-1, 5)
+                im_fam_target = self.anchor_assign(rbox_anchors, gt_bboxes,
+                                                   gt_labels, is_crowd)
+                # feat
+                fam_cls_feat = self.s2anet_head_out[0][idx][im_id]
+                fam_reg_feat = self.s2anet_head_out[1][idx][im_id]
+
+                im_s2anet_fam_feat = (fam_cls_feat, fam_reg_feat)
+                im_fam_cls_loss, im_fam_reg_loss = self.get_fam_loss(
+                    im_fam_target, im_s2anet_fam_feat)
+                fam_cls_loss_lst.append(im_fam_cls_loss)
+                fam_reg_loss_lst.append(im_fam_reg_loss)
+
+            # ODM
+            for idx, refine_anchors in enumerate(self.refine_anchor_list):
+                refine_anchors = refine_anchors.numpy()
+                refine_anchors = refine_anchors.reshape(-1, 5)
+                im_odm_target = self.anchor_assign(refine_anchors, gt_bboxes,
+                                                   gt_labels, is_crowd)
+
+                odm_cls_feat = self.s2anet_head_out[2][idx][im_id]
+                odm_reg_feat = self.s2anet_head_out[3][idx][im_id]
+
+                im_s2anet_odm_feat = (odm_cls_feat, odm_reg_feat)
+                im_odm_cls_loss, im_odm_reg_loss = self.get_odm_loss(
+                    im_odm_target, im_s2anet_odm_feat)
+                odm_cls_loss_lst.append(im_odm_cls_loss)
+                odm_reg_loss_lst.append(im_odm_reg_loss)
+
+        fam_cls_loss = paddle.add_n(fam_cls_loss_lst)
+        fam_reg_loss = paddle.add_n(fam_reg_loss_lst)
+        odm_cls_loss = paddle.add_n(odm_cls_loss_lst)
+        odm_reg_loss = paddle.add_n(odm_reg_loss_lst)
+        return {
+            'fam_cls_loss': fam_cls_loss,
+            'fam_reg_loss': fam_reg_loss,
+            'odm_cls_loss': odm_cls_loss,
+            'odm_reg_loss': odm_reg_loss
+        }
+
+    def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre,
+                   cls_out_channels, use_sigmoid_cls):
+        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
+
+        mlvl_bboxes = []
+        mlvl_scores = []
+
+        idx = 0
+        for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list,
+                                                 mlvl_anchors):
+            cls_score = paddle.reshape(cls_score, [-1, cls_out_channels])
+            if use_sigmoid_cls:
+                scores = F.sigmoid(cls_score)
+            else:
+                scores = F.softmax(cls_score, axis=-1)
+
+            # bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
+            bbox_pred = paddle.transpose(bbox_pred, [1, 2, 0])
+            bbox_pred = paddle.reshape(bbox_pred, [-1, 5])
+            anchors = paddle.reshape(anchors, [-1, 5])
+
+            if nms_pre > 0 and scores.shape[0] > nms_pre:
+                # Get maximum scores for foreground classes.
+                if use_sigmoid_cls:
+                    max_scores = paddle.max(scores, axis=1)
+                else:
+                    max_scores = paddle.max(scores[:, 1:], axis=1)
+
+                topk_val, topk_inds = paddle.topk(max_scores, nms_pre)
+                anchors = paddle.gather(anchors, topk_inds)
+                bbox_pred = paddle.gather(bbox_pred, topk_inds)
+                scores = paddle.gather(scores, topk_inds)
+
+            bboxes = self.delta2rbox(anchors, bbox_pred, self.target_means,
+                                     self.target_stds)
+            mlvl_bboxes.append(bboxes)
+            mlvl_scores.append(scores)
+
+            idx += 1
+
+        mlvl_bboxes = paddle.concat(mlvl_bboxes, axis=0)
+        mlvl_scores = paddle.concat(mlvl_scores)
+        if use_sigmoid_cls:
+            # Add a dummy background class to the front when using sigmoid
+            padding = paddle.zeros(
+                [mlvl_scores.shape[0], 1], dtype=mlvl_scores.dtype)
+            mlvl_scores = paddle.concat([padding, mlvl_scores], axis=1)
+
+        return mlvl_scores, mlvl_bboxes
--- a/build/lib/ppdet/modeling/heads/solov2_head.py
+++ b/build/lib/ppdet/modeling/heads/solov2_head.py
@ -0,0 +1,530 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, Constant
+from ppdet.modeling.layers import ConvNormLayer
+from ppdet.core.workspace import register
+
+from six.moves import zip
+import numpy as np
+
+__all__ = ['SOLOv2Head']
+
+
+@register
+class SOLOv2MaskHead(nn.Layer):
+    """
+    MaskHead of SOLOv2
+
+    Args:
+        in_channels (int): The channel number of input Tensor.
+        out_channels (int): The channel number of output Tensor.
+        start_level (int): The position where the input starts.
+        end_level (int): The position where the input ends.
+        use_dcn_in_tower (bool): Whether to use dcn in tower or not.
+    """
+
+    def __init__(self,
+                 in_channels=256,
+                 mid_channels=128,
+                 out_channels=256,
+                 start_level=0,
+                 end_level=3,
+                 use_dcn_in_tower=False):
+        super(SOLOv2MaskHead, self).__init__()
+        assert start_level >= 0 and end_level >= start_level
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.mid_channels = mid_channels
+        self.use_dcn_in_tower = use_dcn_in_tower
+        self.range_level = end_level - start_level + 1
+        # TODO: add DeformConvNorm
+        conv_type = [ConvNormLayer]
+        self.conv_func = conv_type[0]
+        if self.use_dcn_in_tower:
+            self.conv_func = conv_type[1]
+        self.convs_all_levels = []
+        for i in range(start_level, end_level + 1):
+            conv_feat_name = 'mask_feat_head.convs_all_levels.{}'.format(i)
+            conv_pre_feat = nn.Sequential()
+            if i == start_level:
+                conv_pre_feat.add_sublayer(
+                    conv_feat_name + '.conv' + str(i),
+                    self.conv_func(
+                        ch_in=self.in_channels,
+                        ch_out=self.mid_channels,
+                        filter_size=3,
+                        stride=1,
+                        norm_type='gn'))
+                self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
+                self.convs_all_levels.append(conv_pre_feat)
+            else:
+                for j in range(i):
+                    ch_in = 0
+                    if j == 0:
+                        ch_in = self.in_channels + 2 if i == end_level else self.in_channels
+                    else:
+                        ch_in = self.mid_channels
+                    conv_pre_feat.add_sublayer(
+                        conv_feat_name + '.conv' + str(j),
+                        self.conv_func(
+                            ch_in=ch_in,
+                            ch_out=self.mid_channels,
+                            filter_size=3,
+                            stride=1,
+                            norm_type='gn'))
+                    conv_pre_feat.add_sublayer(
+                        conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU())
+                    conv_pre_feat.add_sublayer(
+                        'upsample' + str(i) + str(j),
+                        nn.Upsample(
+                            scale_factor=2, mode='bilinear'))
+                self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
+                self.convs_all_levels.append(conv_pre_feat)
+
+        conv_pred_name = 'mask_feat_head.conv_pred.0'
+        self.conv_pred = self.add_sublayer(
+            conv_pred_name,
+            self.conv_func(
+                ch_in=self.mid_channels,
+                ch_out=self.out_channels,
+                filter_size=1,
+                stride=1,
+                norm_type='gn'))
+
+    def forward(self, inputs):
+        """
+        Get SOLOv2MaskHead output.
+
+        Args:
+            inputs(list[Tensor]): feature map from each necks with shape of [N, C, H, W]
+        Returns:
+            ins_pred(Tensor): Output of SOLOv2MaskHead head
+        """
+        feat_all_level = F.relu(self.convs_all_levels[0](inputs[0]))
+        for i in range(1, self.range_level):
+            input_p = inputs[i]
+            if i == (self.range_level - 1):
+                input_feat = input_p
+                x_range = paddle.linspace(
+                    -1, 1, paddle.shape(input_feat)[-1], dtype='float32')
+                y_range = paddle.linspace(
+                    -1, 1, paddle.shape(input_feat)[-2], dtype='float32')
+                y, x = paddle.meshgrid([y_range, x_range])
+                x = paddle.unsqueeze(x, [0, 1])
+                y = paddle.unsqueeze(y, [0, 1])
+                y = paddle.expand(
+                    y, shape=[paddle.shape(input_feat)[0], 1, -1, -1])
+                x = paddle.expand(
+                    x, shape=[paddle.shape(input_feat)[0], 1, -1, -1])
+                coord_feat = paddle.concat([x, y], axis=1)
+                input_p = paddle.concat([input_p, coord_feat], axis=1)
+            feat_all_level = paddle.add(feat_all_level,
+                                        self.convs_all_levels[i](input_p))
+        ins_pred = F.relu(self.conv_pred(feat_all_level))
+
+        return ins_pred
+
+
+@register
+class SOLOv2Head(nn.Layer):
+    """
+    Head block for SOLOv2 network
+
+    Args:
+        num_classes (int): Number of output classes.
+        in_channels (int): Number of input channels.
+        seg_feat_channels (int): Num_filters of kernel & categroy branch convolution operation.
+        stacked_convs (int): Times of convolution operation.
+        num_grids (list[int]): List of feature map grids size.
+        kernel_out_channels (int): Number of output channels in kernel branch.
+        dcn_v2_stages (list): Which stage use dcn v2 in tower. It is between [0, stacked_convs).
+        segm_strides (list[int]): List of segmentation area stride.
+        solov2_loss (object): SOLOv2Loss instance.
+        score_threshold (float): Threshold of categroy score.
+        mask_nms (object): MaskMatrixNMS instance.
+    """
+    __inject__ = ['solov2_loss', 'mask_nms']
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 num_classes=80,
+                 in_channels=256,
+                 seg_feat_channels=256,
+                 stacked_convs=4,
+                 num_grids=[40, 36, 24, 16, 12],
+                 kernel_out_channels=256,
+                 dcn_v2_stages=[],
+                 segm_strides=[8, 8, 16, 32, 32],
+                 solov2_loss=None,
+                 score_threshold=0.1,
+                 mask_threshold=0.5,
+                 mask_nms=None):
+        super(SOLOv2Head, self).__init__()
+        self.num_classes = num_classes
+        self.in_channels = in_channels
+        self.seg_num_grids = num_grids
+        self.cate_out_channels = self.num_classes
+        self.seg_feat_channels = seg_feat_channels
+        self.stacked_convs = stacked_convs
+        self.kernel_out_channels = kernel_out_channels
+        self.dcn_v2_stages = dcn_v2_stages
+        self.segm_strides = segm_strides
+        self.solov2_loss = solov2_loss
+        self.mask_nms = mask_nms
+        self.score_threshold = score_threshold
+        self.mask_threshold = mask_threshold
+
+        conv_type = [ConvNormLayer]
+        self.conv_func = conv_type[0]
+        self.kernel_pred_convs = []
+        self.cate_pred_convs = []
+        for i in range(self.stacked_convs):
+            if i in self.dcn_v2_stages:
+                self.conv_func = conv_type[1]
+            ch_in = self.in_channels + 2 if i == 0 else self.seg_feat_channels
+            kernel_conv = self.add_sublayer(
+                'bbox_head.kernel_convs.' + str(i),
+                self.conv_func(
+                    ch_in=ch_in,
+                    ch_out=self.seg_feat_channels,
+                    filter_size=3,
+                    stride=1,
+                    norm_type='gn'))
+            self.kernel_pred_convs.append(kernel_conv)
+            ch_in = self.in_channels if i == 0 else self.seg_feat_channels
+            cate_conv = self.add_sublayer(
+                'bbox_head.cate_convs.' + str(i),
+                self.conv_func(
+                    ch_in=ch_in,
+                    ch_out=self.seg_feat_channels,
+                    filter_size=3,
+                    stride=1,
+                    norm_type='gn'))
+            self.cate_pred_convs.append(cate_conv)
+
+        self.solo_kernel = self.add_sublayer(
+            'bbox_head.solo_kernel',
+            nn.Conv2D(
+                self.seg_feat_channels,
+                self.kernel_out_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=True))
+        self.solo_cate = self.add_sublayer(
+            'bbox_head.solo_cate',
+            nn.Conv2D(
+                self.seg_feat_channels,
+                self.cate_out_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=ParamAttr(initializer=Constant(
+                    value=float(-np.log((1 - 0.01) / 0.01))))))
+
+    def _points_nms(self, heat, kernel_size=2):
+        hmax = F.max_pool2d(heat, kernel_size=kernel_size, stride=1, padding=1)
+        keep = paddle.cast((hmax[:, :, :-1, :-1] == heat), 'float32')
+        return heat * keep
+
+    def _split_feats(self, feats):
+        return (F.interpolate(
+            feats[0],
+            scale_factor=0.5,
+            align_corners=False,
+            align_mode=0,
+            mode='bilinear'), feats[1], feats[2], feats[3], F.interpolate(
+                feats[4],
+                size=paddle.shape(feats[3])[-2:],
+                mode='bilinear',
+                align_corners=False,
+                align_mode=0))
+
+    def forward(self, input):
+        """
+        Get SOLOv2 head output
+
+        Args:
+            input (list): List of Tensors, output of backbone or neck stages
+        Returns:
+            cate_pred_list (list): Tensors of each category branch layer
+            kernel_pred_list (list): Tensors of each kernel branch layer
+        """
+        feats = self._split_feats(input)
+        cate_pred_list = []
+        kernel_pred_list = []
+        for idx in range(len(self.seg_num_grids)):
+            cate_pred, kernel_pred = self._get_output_single(feats[idx], idx)
+            cate_pred_list.append(cate_pred)
+            kernel_pred_list.append(kernel_pred)
+
+        return cate_pred_list, kernel_pred_list
+
+    def _get_output_single(self, input, idx):
+        ins_kernel_feat = input
+        # CoordConv
+        x_range = paddle.linspace(
+            -1, 1, paddle.shape(ins_kernel_feat)[-1], dtype='float32')
+        y_range = paddle.linspace(
+            -1, 1, paddle.shape(ins_kernel_feat)[-2], dtype='float32')
+        y, x = paddle.meshgrid([y_range, x_range])
+        x = paddle.unsqueeze(x, [0, 1])
+        y = paddle.unsqueeze(y, [0, 1])
+        y = paddle.expand(
+            y, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1])
+        x = paddle.expand(
+            x, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1])
+        coord_feat = paddle.concat([x, y], axis=1)
+        ins_kernel_feat = paddle.concat([ins_kernel_feat, coord_feat], axis=1)
+
+        # kernel branch
+        kernel_feat = ins_kernel_feat
+        seg_num_grid = self.seg_num_grids[idx]
+        kernel_feat = F.interpolate(
+            kernel_feat,
+            size=[seg_num_grid, seg_num_grid],
+            mode='bilinear',
+            align_corners=False,
+            align_mode=0)
+        cate_feat = kernel_feat[:, :-2, :, :]
+
+        for kernel_layer in self.kernel_pred_convs:
+            kernel_feat = F.relu(kernel_layer(kernel_feat))
+        kernel_pred = self.solo_kernel(kernel_feat)
+        # cate branch
+        for cate_layer in self.cate_pred_convs:
+            cate_feat = F.relu(cate_layer(cate_feat))
+        cate_pred = self.solo_cate(cate_feat)
+
+        if not self.training:
+            cate_pred = self._points_nms(F.sigmoid(cate_pred), kernel_size=2)
+            cate_pred = paddle.transpose(cate_pred, [0, 2, 3, 1])
+        return cate_pred, kernel_pred
+
+    def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels,
+                 cate_labels, grid_order_list, fg_num):
+        """
+        Get loss of network of SOLOv2.
+
+        Args:
+            cate_preds (list): Tensor list of categroy branch output.
+            kernel_preds (list): Tensor list of kernel branch output.
+            ins_pred (list): Tensor list of instance branch output.
+            ins_labels (list): List of instance labels pre batch.
+            cate_labels (list): List of categroy labels pre batch.
+            grid_order_list (list): List of index in pre grid.
+            fg_num (int): Number of positive samples in a mini-batch.
+        Returns:
+            loss_ins (Tensor): The instance loss Tensor of SOLOv2 network.
+            loss_cate (Tensor): The category loss Tensor of SOLOv2 network.
+        """
+        batch_size = paddle.shape(grid_order_list[0])[0]
+        ins_pred_list = []
+        for kernel_preds_level, grid_orders_level in zip(kernel_preds,
+                                                         grid_order_list):
+            if grid_orders_level.shape[1] == 0:
+                ins_pred_list.append(None)
+                continue
+            grid_orders_level = paddle.reshape(grid_orders_level, [-1])
+            reshape_pred = paddle.reshape(
+                kernel_preds_level,
+                shape=(paddle.shape(kernel_preds_level)[0],
+                       paddle.shape(kernel_preds_level)[1], -1))
+            reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1])
+            reshape_pred = paddle.reshape(
+                reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2]))
+            gathered_pred = paddle.gather(reshape_pred, index=grid_orders_level)
+            gathered_pred = paddle.reshape(
+                gathered_pred,
+                shape=[batch_size, -1, paddle.shape(gathered_pred)[1]])
+            cur_ins_pred = ins_pred
+            cur_ins_pred = paddle.reshape(
+                cur_ins_pred,
+                shape=(paddle.shape(cur_ins_pred)[0],
+                       paddle.shape(cur_ins_pred)[1], -1))
+            ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred)
+            cur_ins_pred = paddle.reshape(
+                ins_pred_conv,
+                shape=(-1, paddle.shape(ins_pred)[-2],
+                       paddle.shape(ins_pred)[-1]))
+            ins_pred_list.append(cur_ins_pred)
+
+        num_ins = paddle.sum(fg_num)
+        cate_preds = [
+            paddle.reshape(
+                paddle.transpose(cate_pred, [0, 2, 3, 1]),
+                shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds
+        ]
+        flatten_cate_preds = paddle.concat(cate_preds)
+        new_cate_labels = []
+        for cate_label in cate_labels:
+            new_cate_labels.append(paddle.reshape(cate_label, shape=[-1]))
+        cate_labels = paddle.concat(new_cate_labels)
+
+        loss_ins, loss_cate = self.solov2_loss(
+            ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins)
+
+        return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
+
+    def get_prediction(self, cate_preds, kernel_preds, seg_pred, im_shape,
+                       scale_factor):
+        """
+        Get prediction result of SOLOv2 network
+
+        Args:
+            cate_preds (list): List of Variables, output of categroy branch.
+            kernel_preds (list): List of Variables, output of kernel branch.
+            seg_pred (list): List of Variables, output of mask head stages.
+            im_shape (Variables): [h, w] for input images.
+            scale_factor (Variables): [scale, scale] for input images.
+        Returns:
+            seg_masks (Tensor): The prediction segmentation.
+            cate_labels (Tensor): The prediction categroy label of each segmentation.
+            seg_masks (Tensor): The prediction score of each segmentation.
+        """
+        num_levels = len(cate_preds)
+        featmap_size = paddle.shape(seg_pred)[-2:]
+        seg_masks_list = []
+        cate_labels_list = []
+        cate_scores_list = []
+        cate_preds = [cate_pred * 1.0 for cate_pred in cate_preds]
+        kernel_preds = [kernel_pred * 1.0 for kernel_pred in kernel_preds]
+        # Currently only supports batch size == 1
+        for idx in range(1):
+            cate_pred_list = [
+                paddle.reshape(
+                    cate_preds[i][idx], shape=(-1, self.cate_out_channels))
+                for i in range(num_levels)
+            ]
+            seg_pred_list = seg_pred
+            kernel_pred_list = [
+                paddle.reshape(
+                    paddle.transpose(kernel_preds[i][idx], [1, 2, 0]),
+                    shape=(-1, self.kernel_out_channels))
+                for i in range(num_levels)
+            ]
+            cate_pred_list = paddle.concat(cate_pred_list, axis=0)
+            kernel_pred_list = paddle.concat(kernel_pred_list, axis=0)
+
+            seg_masks, cate_labels, cate_scores = self.get_seg_single(
+                cate_pred_list, seg_pred_list, kernel_pred_list, featmap_size,
+                im_shape[idx], scale_factor[idx][0])
+            bbox_num = paddle.shape(cate_labels)[0]
+        return seg_masks, cate_labels, cate_scores, bbox_num
+
+    def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
+                       im_shape, scale_factor):
+        h = paddle.cast(im_shape[0], 'int32')[0]
+        w = paddle.cast(im_shape[1], 'int32')[0]
+        upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4]
+
+        y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32')
+        inds = paddle.where(cate_preds > self.score_threshold, cate_preds, y)
+        inds = paddle.nonzero(inds)
+        cate_preds = paddle.reshape(cate_preds, shape=[-1])
+        # Prevent empty and increase fake data
+        ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64')
+        ind_b = paddle.zeros(shape=[1], dtype='int64')
+        inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0)
+        inds = paddle.concat([inds, inds_end])
+        kernel_preds_end = paddle.ones(
+            shape=[1, self.kernel_out_channels], dtype='float32')
+        kernel_preds = paddle.concat([kernel_preds, kernel_preds_end])
+        cate_preds = paddle.concat(
+            [cate_preds, paddle.zeros(
+                shape=[1], dtype='float32')])
+
+        # cate_labels & kernel_preds
+        cate_labels = inds[:, 1]
+        kernel_preds = paddle.gather(kernel_preds, index=inds[:, 0])
+        cate_score_idx = paddle.add(inds[:, 0] * 80, cate_labels)
+        cate_scores = paddle.gather(cate_preds, index=cate_score_idx)
+
+        size_trans = np.power(self.seg_num_grids, 2)
+        strides = []
+        for _ind in range(len(self.segm_strides)):
+            strides.append(
+                paddle.full(
+                    shape=[int(size_trans[_ind])],
+                    fill_value=self.segm_strides[_ind],
+                    dtype="int32"))
+        strides = paddle.concat(strides)
+        strides = paddle.gather(strides, index=inds[:, 0])
+
+        # mask encoding.
+        kernel_preds = paddle.unsqueeze(kernel_preds, [2, 3])
+        seg_preds = F.conv2d(seg_preds, kernel_preds)
+        seg_preds = F.sigmoid(paddle.squeeze(seg_preds, [0]))
+        seg_masks = seg_preds > self.mask_threshold
+        seg_masks = paddle.cast(seg_masks, 'float32')
+        sum_masks = paddle.sum(seg_masks, axis=[1, 2])
+
+        y = paddle.zeros(shape=paddle.shape(sum_masks), dtype='float32')
+        keep = paddle.where(sum_masks > strides, sum_masks, y)
+        keep = paddle.nonzero(keep)
+        keep = paddle.squeeze(keep, axis=[1])
+        # Prevent empty and increase fake data
+        keep_other = paddle.concat(
+            [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')])
+        keep_scores = paddle.concat(
+            [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')])
+        cate_scores_end = paddle.zeros(shape=[1], dtype='float32')
+        cate_scores = paddle.concat([cate_scores, cate_scores_end])
+
+        seg_masks = paddle.gather(seg_masks, index=keep_other)
+        seg_preds = paddle.gather(seg_preds, index=keep_other)
+        sum_masks = paddle.gather(sum_masks, index=keep_other)
+        cate_labels = paddle.gather(cate_labels, index=keep_other)
+        cate_scores = paddle.gather(cate_scores, index=keep_scores)
+
+        # mask scoring.
+        seg_mul = paddle.cast(seg_preds * seg_masks, 'float32')
+        seg_scores = paddle.sum(seg_mul, axis=[1, 2]) / sum_masks
+        cate_scores *= seg_scores
+        # Matrix NMS
+        seg_preds, cate_scores, cate_labels = self.mask_nms(
+            seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=sum_masks)
+        ori_shape = im_shape[:2] / scale_factor + 0.5
+        ori_shape = paddle.cast(ori_shape, 'int32')
+        seg_preds = F.interpolate(
+            paddle.unsqueeze(seg_preds, 0),
+            size=upsampled_size_out,
+            mode='bilinear',
+            align_corners=False,
+            align_mode=0)
+        seg_preds = paddle.slice(
+            seg_preds, axes=[2, 3], starts=[0, 0], ends=[h, w])
+        seg_masks = paddle.squeeze(
+            F.interpolate(
+                seg_preds,
+                size=ori_shape[:2],
+                mode='bilinear',
+                align_corners=False,
+                align_mode=0),
+            axis=[0])
+        seg_masks = paddle.cast(seg_masks > self.mask_threshold, 'uint8')
+        return seg_masks, cate_labels, cate_scores
--- a/build/lib/ppdet/modeling/heads/ssd_head.py
+++ b/build/lib/ppdet/modeling/heads/ssd_head.py
@ -0,0 +1,175 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register
+from paddle.regularizer import L2Decay
+from paddle import ParamAttr
+
+from ..layers import AnchorGeneratorSSD
+
+
+class SepConvLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 padding=1,
+                 conv_decay=0):
+        super(SepConvLayer, self).__init__()
+        self.dw_conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=padding,
+            groups=in_channels,
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm2D(
+            in_channels,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.)))
+
+        self.pw_conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
+            bias_attr=False)
+
+    def forward(self, x):
+        x = self.dw_conv(x)
+        x = F.relu6(self.bn(x))
+        x = self.pw_conv(x)
+        return x
+
+
+@register
+class SSDHead(nn.Layer):
+    """
+    SSDHead
+
+    Args:
+        num_classes (int): Number of classes
+        in_channels (list): Number of channels per input feature
+        anchor_generator (dict): Configuration of 'AnchorGeneratorSSD' instance
+        kernel_size (int): Conv kernel size
+        padding (int): Conv padding
+        use_sepconv (bool): Use SepConvLayer if true
+        conv_decay (float): Conv regularization coeff
+        loss (object): 'SSDLoss' instance
+    """
+
+    __shared__ = ['num_classes']
+    __inject__ = ['anchor_generator', 'loss']
+
+    def __init__(self,
+                 num_classes=80,
+                 in_channels=(512, 1024, 512, 256, 256, 256),
+                 anchor_generator=AnchorGeneratorSSD().__dict__,
+                 kernel_size=3,
+                 padding=1,
+                 use_sepconv=False,
+                 conv_decay=0.,
+                 loss='SSDLoss'):
+        super(SSDHead, self).__init__()
+        # add background class
+        self.num_classes = num_classes + 1
+        self.in_channels = in_channels
+        self.anchor_generator = anchor_generator
+        self.loss = loss
+
+        if isinstance(anchor_generator, dict):
+            self.anchor_generator = AnchorGeneratorSSD(**anchor_generator)
+
+        self.num_priors = self.anchor_generator.num_priors
+        self.box_convs = []
+        self.score_convs = []
+        for i, num_prior in enumerate(self.num_priors):
+            box_conv_name = "boxes{}".format(i)
+            if not use_sepconv:
+                box_conv = self.add_sublayer(
+                    box_conv_name,
+                    nn.Conv2D(
+                        in_channels=in_channels[i],
+                        out_channels=num_prior * 4,
+                        kernel_size=kernel_size,
+                        padding=padding))
+            else:
+                box_conv = self.add_sublayer(
+                    box_conv_name,
+                    SepConvLayer(
+                        in_channels=in_channels[i],
+                        out_channels=num_prior * 4,
+                        kernel_size=kernel_size,
+                        padding=padding,
+                        conv_decay=conv_decay))
+            self.box_convs.append(box_conv)
+
+            score_conv_name = "scores{}".format(i)
+            if not use_sepconv:
+                score_conv = self.add_sublayer(
+                    score_conv_name,
+                    nn.Conv2D(
+                        in_channels=in_channels[i],
+                        out_channels=num_prior * self.num_classes,
+                        kernel_size=kernel_size,
+                        padding=padding))
+            else:
+                score_conv = self.add_sublayer(
+                    score_conv_name,
+                    SepConvLayer(
+                        in_channels=in_channels[i],
+                        out_channels=num_prior * self.num_classes,
+                        kernel_size=kernel_size,
+                        padding=padding,
+                        conv_decay=conv_decay))
+            self.score_convs.append(score_conv)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {'in_channels': [i.channels for i in input_shape], }
+
+    def forward(self, feats, image, gt_bbox=None, gt_class=None):
+        box_preds = []
+        cls_scores = []
+        prior_boxes = []
+        for feat, box_conv, score_conv in zip(feats, self.box_convs,
+                                              self.score_convs):
+            box_pred = box_conv(feat)
+            box_pred = paddle.transpose(box_pred, [0, 2, 3, 1])
+            box_pred = paddle.reshape(box_pred, [0, -1, 4])
+            box_preds.append(box_pred)
+
+            cls_score = score_conv(feat)
+            cls_score = paddle.transpose(cls_score, [0, 2, 3, 1])
+            cls_score = paddle.reshape(cls_score, [0, -1, self.num_classes])
+            cls_scores.append(cls_score)
+
+        prior_boxes = self.anchor_generator(feats, image)
+
+        if self.training:
+            return self.get_loss(box_preds, cls_scores, gt_bbox, gt_class,
+                                 prior_boxes)
+        else:
+            return (box_preds, cls_scores), prior_boxes
+
+    def get_loss(self, boxes, scores, gt_bbox, gt_class, prior_boxes):
+        return self.loss(boxes, scores, gt_bbox, gt_class, prior_boxes)
--- a/build/lib/ppdet/modeling/heads/ttf_head.py
+++ b/build/lib/ppdet/modeling/heads/ttf_head.py
@ -0,0 +1,309 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant, Normal
+from paddle.regularizer import L2Decay
+from ppdet.core.workspace import register
+from ppdet.modeling.layers import DeformableConvV2, LiteConv
+import numpy as np
+
+
+@register
+class HMHead(nn.Layer):
+    """
+    Args:
+        ch_in (int): The channel number of input Tensor.
+        ch_out (int): The channel number of output Tensor.
+        num_classes (int): Number of classes.
+        conv_num (int): The convolution number of hm_feat.
+        dcn_head(bool): whether use dcn in head. False by default. 
+        lite_head(bool): whether use lite version. False by default.
+        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
+            bn by default
+
+    Return:
+        Heatmap head output
+    """
+    __shared__ = ['num_classes', 'norm_type']
+
+    def __init__(
+            self,
+            ch_in,
+            ch_out=128,
+            num_classes=80,
+            conv_num=2,
+            dcn_head=False,
+            lite_head=False,
+            norm_type='bn', ):
+        super(HMHead, self).__init__()
+        head_conv = nn.Sequential()
+        for i in range(conv_num):
+            name = 'conv.{}'.format(i)
+            if lite_head:
+                lite_name = 'hm.' + name
+                head_conv.add_sublayer(
+                    lite_name,
+                    LiteConv(
+                        in_channels=ch_in if i == 0 else ch_out,
+                        out_channels=ch_out,
+                        norm_type=norm_type))
+                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
+            else:
+                if dcn_head:
+                    head_conv.add_sublayer(
+                        name,
+                        DeformableConvV2(
+                            in_channels=ch_in if i == 0 else ch_out,
+                            out_channels=ch_out,
+                            kernel_size=3,
+                            weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
+                else:
+                    head_conv.add_sublayer(
+                        name,
+                        nn.Conv2D(
+                            in_channels=ch_in if i == 0 else ch_out,
+                            out_channels=ch_out,
+                            kernel_size=3,
+                            padding=1,
+                            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+                            bias_attr=ParamAttr(
+                                learning_rate=2., regularizer=L2Decay(0.))))
+                head_conv.add_sublayer(name + '.act', nn.ReLU())
+        self.feat = head_conv
+        bias_init = float(-np.log((1 - 0.01) / 0.01))
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=num_classes,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+            bias_attr=ParamAttr(
+                learning_rate=2.,
+                regularizer=L2Decay(0.),
+                initializer=Constant(bias_init)))
+
+    def forward(self, feat):
+        out = self.feat(feat)
+        out = self.head(out)
+        return out
+
+
+@register
+class WHHead(nn.Layer):
+    """
+    Args:
+        ch_in (int): The channel number of input Tensor.
+        ch_out (int): The channel number of output Tensor.
+        conv_num (int): The convolution number of wh_feat.
+        dcn_head(bool): whether use dcn in head. False by default.
+        lite_head(bool): whether use lite version. False by default.
+        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
+            bn by default
+    Return:
+        Width & Height head output
+    """
+    __shared__ = ['norm_type']
+
+    def __init__(self,
+                 ch_in,
+                 ch_out=64,
+                 conv_num=2,
+                 dcn_head=False,
+                 lite_head=False,
+                 norm_type='bn'):
+        super(WHHead, self).__init__()
+        head_conv = nn.Sequential()
+        for i in range(conv_num):
+            name = 'conv.{}'.format(i)
+            if lite_head:
+                lite_name = 'wh.' + name
+                head_conv.add_sublayer(
+                    lite_name,
+                    LiteConv(
+                        in_channels=ch_in if i == 0 else ch_out,
+                        out_channels=ch_out,
+                        norm_type=norm_type))
+                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
+            else:
+                if dcn_head:
+                    head_conv.add_sublayer(
+                        name,
+                        DeformableConvV2(
+                            in_channels=ch_in if i == 0 else ch_out,
+                            out_channels=ch_out,
+                            kernel_size=3,
+                            weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
+                else:
+                    head_conv.add_sublayer(
+                        name,
+                        nn.Conv2D(
+                            in_channels=ch_in if i == 0 else ch_out,
+                            out_channels=ch_out,
+                            kernel_size=3,
+                            padding=1,
+                            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+                            bias_attr=ParamAttr(
+                                learning_rate=2., regularizer=L2Decay(0.))))
+                head_conv.add_sublayer(name + '.act', nn.ReLU())
+
+        self.feat = head_conv
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=4,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
+            bias_attr=ParamAttr(
+                learning_rate=2., regularizer=L2Decay(0.)))
+
+    def forward(self, feat):
+        out = self.feat(feat)
+        out = self.head(out)
+        out = F.relu(out)
+        return out
+
+
+@register
+class TTFHead(nn.Layer):
+    """
+    TTFHead
+    Args:
+        in_channels (int): the channel number of input to TTFHead.
+        num_classes (int): the number of classes, 80 by default.
+        hm_head_planes (int): the channel number in heatmap head,
+            128 by default.
+        wh_head_planes (int): the channel number in width & height head,
+            64 by default.
+        hm_head_conv_num (int): the number of convolution in heatmap head,
+            2 by default.
+        wh_head_conv_num (int): the number of convolution in width & height
+            head, 2 by default.
+        hm_loss (object): Instance of 'CTFocalLoss'.
+        wh_loss (object): Instance of 'GIoULoss'.
+        wh_offset_base (float): the base offset of width and height,
+            16.0 by default.
+        down_ratio (int): the actual down_ratio is calculated by base_down_ratio
+            (default 16) and the number of upsample layers.
+        lite_head(bool): whether use lite version. False by default.
+        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
+            bn by default
+        ags_module(bool): whether use AGS module to reweight location feature.
+            false by default.
+
+    """
+
+    __shared__ = ['num_classes', 'down_ratio', 'norm_type']
+    __inject__ = ['hm_loss', 'wh_loss']
+
+    def __init__(self,
+                 in_channels,
+                 num_classes=80,
+                 hm_head_planes=128,
+                 wh_head_planes=64,
+                 hm_head_conv_num=2,
+                 wh_head_conv_num=2,
+                 hm_loss='CTFocalLoss',
+                 wh_loss='GIoULoss',
+                 wh_offset_base=16.,
+                 down_ratio=4,
+                 dcn_head=False,
+                 lite_head=False,
+                 norm_type='bn',
+                 ags_module=False):
+        super(TTFHead, self).__init__()
+        self.in_channels = in_channels
+        self.hm_head = HMHead(in_channels, hm_head_planes, num_classes,
+                              hm_head_conv_num, dcn_head, lite_head, norm_type)
+        self.wh_head = WHHead(in_channels, wh_head_planes, wh_head_conv_num,
+                              dcn_head, lite_head, norm_type)
+        self.hm_loss = hm_loss
+        self.wh_loss = wh_loss
+
+        self.wh_offset_base = wh_offset_base
+        self.down_ratio = down_ratio
+        self.ags_module = ags_module
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        if isinstance(input_shape, (list, tuple)):
+            input_shape = input_shape[0]
+        return {'in_channels': input_shape.channels, }
+
+    def forward(self, feats):
+        hm = self.hm_head(feats)
+        wh = self.wh_head(feats) * self.wh_offset_base
+        return hm, wh
+
+    def filter_box_by_weight(self, pred, target, weight):
+        """
+        Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
+        """
+        index = paddle.nonzero(weight > 0)
+        index.stop_gradient = True
+        weight = paddle.gather_nd(weight, index)
+        pred = paddle.gather_nd(pred, index)
+        target = paddle.gather_nd(target, index)
+        return pred, target, weight
+
+    def filter_loc_by_weight(self, score, weight):
+        index = paddle.nonzero(weight > 0)
+        index.stop_gradient = True
+        score = paddle.gather_nd(score, index)
+        return score
+
+    def get_loss(self, pred_hm, pred_wh, target_hm, box_target, target_weight):
+        pred_hm = paddle.clip(F.sigmoid(pred_hm), 1e-4, 1 - 1e-4)
+        hm_loss = self.hm_loss(pred_hm, target_hm)
+        H, W = target_hm.shape[2:]
+        mask = paddle.reshape(target_weight, [-1, H, W])
+        avg_factor = paddle.sum(mask) + 1e-4
+
+        base_step = self.down_ratio
+        shifts_x = paddle.arange(0, W * base_step, base_step, dtype='int32')
+        shifts_y = paddle.arange(0, H * base_step, base_step, dtype='int32')
+        shift_y, shift_x = paddle.tensor.meshgrid([shifts_y, shifts_x])
+        base_loc = paddle.stack([shift_x, shift_y], axis=0)
+        base_loc.stop_gradient = True
+
+        pred_boxes = paddle.concat(
+            [0 - pred_wh[:, 0:2, :, :] + base_loc, pred_wh[:, 2:4] + base_loc],
+            axis=1)
+        pred_boxes = paddle.transpose(pred_boxes, [0, 2, 3, 1])
+        boxes = paddle.transpose(box_target, [0, 2, 3, 1])
+        boxes.stop_gradient = True
+
+        if self.ags_module:
+            pred_hm_max = paddle.max(pred_hm, axis=1, keepdim=True)
+            pred_hm_max_softmax = F.softmax(pred_hm_max, axis=1)
+            pred_hm_max_softmax = paddle.transpose(pred_hm_max_softmax,
+                                                   [0, 2, 3, 1])
+            pred_hm_max_softmax = self.filter_loc_by_weight(pred_hm_max_softmax,
+                                                            mask)
+        else:
+            pred_hm_max_softmax = None
+
+        pred_boxes, boxes, mask = self.filter_box_by_weight(pred_boxes, boxes,
+                                                            mask)
+        mask.stop_gradient = True
+        wh_loss = self.wh_loss(
+            pred_boxes,
+            boxes,
+            iou_weight=mask.unsqueeze(1),
+            loc_reweight=pred_hm_max_softmax)
+        wh_loss = wh_loss / avg_factor
+
+        ttf_loss = {'hm_loss': hm_loss, 'wh_loss': wh_loss}
+        return ttf_loss
--- a/build/lib/ppdet/modeling/heads/yolo_head.py
+++ b/build/lib/ppdet/modeling/heads/yolo_head.py
@ -0,0 +1,124 @@
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from ppdet.core.workspace import register
+
+
+def _de_sigmoid(x, eps=1e-7):
+    x = paddle.clip(x, eps, 1. / eps)
+    x = paddle.clip(1. / x - 1., eps, 1. / eps)
+    x = -paddle.log(x)
+    return x
+
+
+@register
+class YOLOv3Head(nn.Layer):
+    __shared__ = ['num_classes', 'data_format']
+    __inject__ = ['loss']
+
+    def __init__(self,
+                 in_channels=[1024, 512, 256],
+                 anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
+                          [59, 119], [116, 90], [156, 198], [373, 326]],
+                 anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
+                 num_classes=80,
+                 loss='YOLOv3Loss',
+                 iou_aware=False,
+                 iou_aware_factor=0.4,
+                 data_format='NCHW'):
+        """
+        Head for YOLOv3 network
+
+        Args:
+            num_classes (int): number of foreground classes
+            anchors (list): anchors
+            anchor_masks (list): anchor masks
+            loss (object): YOLOv3Loss instance
+            iou_aware (bool): whether to use iou_aware
+            iou_aware_factor (float): iou aware factor
+            data_format (str): data format, NCHW or NHWC
+        """
+        super(YOLOv3Head, self).__init__()
+        assert len(in_channels) > 0, "in_channels length should > 0"
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.loss = loss
+
+        self.iou_aware = iou_aware
+        self.iou_aware_factor = iou_aware_factor
+
+        self.parse_anchor(anchors, anchor_masks)
+        self.num_outputs = len(self.anchors)
+        self.data_format = data_format
+
+        self.yolo_outputs = []
+        for i in range(len(self.anchors)):
+
+            if self.iou_aware:
+                num_filters = len(self.anchors[i]) * (self.num_classes + 6)
+            else:
+                num_filters = len(self.anchors[i]) * (self.num_classes + 5)
+            name = 'yolo_output.{}'.format(i)
+            conv = nn.Conv2D(
+                in_channels=self.in_channels[i],
+                out_channels=num_filters,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                data_format=data_format,
+                bias_attr=ParamAttr(regularizer=L2Decay(0.)))
+            conv.skip_quant = True
+            yolo_output = self.add_sublayer(name, conv)
+            self.yolo_outputs.append(yolo_output)
+
+    def parse_anchor(self, anchors, anchor_masks):
+        self.anchors = [[anchors[i] for i in mask] for mask in anchor_masks]
+        self.mask_anchors = []
+        anchor_num = len(anchors)
+        for masks in anchor_masks:
+            self.mask_anchors.append([])
+            for mask in masks:
+                assert mask < anchor_num, "anchor mask index overflow"
+                self.mask_anchors[-1].extend(anchors[mask])
+
+    def forward(self, feats, targets=None):
+        assert len(feats) == len(self.anchors)
+        yolo_outputs = []
+        for i, feat in enumerate(feats):
+            yolo_output = self.yolo_outputs[i](feat)
+            if self.data_format == 'NHWC':
+                yolo_output = paddle.transpose(yolo_output, [0, 3, 1, 2])
+            yolo_outputs.append(yolo_output)
+
+        if self.training:
+            return self.loss(yolo_outputs, targets, self.anchors)
+        else:
+            if self.iou_aware:
+                y = []
+                for i, out in enumerate(yolo_outputs):
+                    na = len(self.anchors[i])
+                    ioup, x = out[:, 0:na, :, :], out[:, na:, :, :]
+                    b, c, h, w = x.shape
+                    no = c // na
+                    x = x.reshape((b, na, no, h * w))
+                    ioup = ioup.reshape((b, na, 1, h * w))
+                    obj = x[:, :, 4:5, :]
+                    ioup = F.sigmoid(ioup)
+                    obj = F.sigmoid(obj)
+                    obj_t = (obj**(1 - self.iou_aware_factor)) * (
+                        ioup**self.iou_aware_factor)
+                    obj_t = _de_sigmoid(obj_t)
+                    loc_t = x[:, :, :4, :]
+                    cls_t = x[:, :, 5:, :]
+                    y_t = paddle.concat([loc_t, obj_t, cls_t], axis=2)
+                    y_t = y_t.reshape((b, c, h, w))
+                    y.append(y_t)
+                return y
+            else:
+                return yolo_outputs
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {'in_channels': [i.channels for i in input_shape], }
--- a/build/lib/ppdet/modeling/keypoint_utils.py
+++ b/build/lib/ppdet/modeling/keypoint_utils.py
@ -0,0 +1,302 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+
+
+def get_affine_mat_kernel(h, w, s, inv=False):
+    if w < h:
+        w_ = s
+        h_ = int(np.ceil((s / w * h) / 64.) * 64)
+        scale_w = w
+        scale_h = h_ / w_ * w
+
+    else:
+        h_ = s
+        w_ = int(np.ceil((s / h * w) / 64.) * 64)
+        scale_h = h
+        scale_w = w_ / h_ * h
+
+    center = np.array([np.round(w / 2.), np.round(h / 2.)])
+
+    size_resized = (w_, h_)
+    trans = get_affine_transform(
+        center, np.array([scale_w, scale_h]), 0, size_resized, inv=inv)
+
+    return trans, size_resized
+
+
+def get_affine_transform(center,
+                         input_size,
+                         rot,
+                         output_size,
+                         shift=(0., 0.),
+                         inv=False):
+    """Get the affine transform matrix, given the center/scale/rot/output_size.
+
+    Args:
+        center (np.ndarray[2, ]): Center of the bounding box (x, y).
+        scale (np.ndarray[2, ]): Scale of the bounding box
+            wrt [width, height].
+        rot (float): Rotation angle (degree).
+        output_size (np.ndarray[2, ]): Size of the destination heatmaps.
+        shift (0-100%): Shift translation ratio wrt the width/height.
+            Default (0., 0.).
+        inv (bool): Option to inverse the affine transform direction.
+            (inv=False: src->dst or inv=True: dst->src)
+
+    Returns:
+        np.ndarray: The transform matrix.
+    """
+    assert len(center) == 2
+    assert len(input_size) == 2
+    assert len(output_size) == 2
+    assert len(shift) == 2
+
+    scale_tmp = input_size
+
+    shift = np.array(shift)
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+
+    rot_rad = np.pi * rot / 180
+    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
+    dst_dir = np.array([0., dst_w * -0.5])
+
+    src = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
+
+    dst = np.zeros((3, 2), dtype=np.float32)
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
+
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+    return trans
+
+
+def _get_3rd_point(a, b):
+    """To calculate the affine matrix, three pairs of points are required. This
+    function is used to get the 3rd point, given 2D points a & b.
+
+    The 3rd point is defined by rotating vector `a - b` by 90 degrees
+    anticlockwise, using b as the rotation center.
+
+    Args:
+        a (np.ndarray): point(x,y)
+        b (np.ndarray): point(x,y)
+
+    Returns:
+        np.ndarray: The 3rd point.
+    """
+    assert len(a) == 2
+    assert len(b) == 2
+    direction = a - b
+    third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
+
+    return third_pt
+
+
+def rotate_point(pt, angle_rad):
+    """Rotate a point by an angle.
+
+    Args:
+        pt (list[float]): 2 dimensional point to be rotated
+        angle_rad (float): rotation angle by radian
+
+    Returns:
+        list[float]: Rotated point.
+    """
+    assert len(pt) == 2
+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+    new_x = pt[0] * cs - pt[1] * sn
+    new_y = pt[0] * sn + pt[1] * cs
+    rotated_pt = [new_x, new_y]
+
+    return rotated_pt
+
+
+def transpred(kpts, h, w, s):
+    trans, _ = get_affine_mat_kernel(h, w, s, inv=True)
+
+    return warp_affine_joints(kpts[..., :2].copy(), trans)
+
+
+def warp_affine_joints(joints, mat):
+    """Apply affine transformation defined by the transform matrix on the
+    joints.
+
+    Args:
+        joints (np.ndarray[..., 2]): Origin coordinate of joints.
+        mat (np.ndarray[3, 2]): The affine matrix.
+
+    Returns:
+        matrix (np.ndarray[..., 2]): Result coordinate of joints.
+    """
+    joints = np.array(joints)
+    shape = joints.shape
+    joints = joints.reshape(-1, 2)
+    return np.dot(np.concatenate(
+        (joints, joints[:, 0:1] * 0 + 1), axis=1),
+                  mat.T).reshape(shape)
+
+
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
+
+
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
+    if not isinstance(sigmas, np.ndarray):
+        sigmas = np.array([
+            .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
+            .87, .87, .89, .89
+        ]) / 10.0
+    vars = (sigmas * 2)**2
+    xg = g[0::3]
+    yg = g[1::3]
+    vg = g[2::3]
+    ious = np.zeros((d.shape[0]))
+    for n_d in range(0, d.shape[0]):
+        xd = d[n_d, 0::3]
+        yd = d[n_d, 1::3]
+        vd = d[n_d, 2::3]
+        dx = xd - xg
+        dy = yd - yg
+        e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+        if in_vis_thre is not None:
+            ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
+            e = e[ind]
+        ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
+    return ious
+
+
+def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh
+
+    Args:
+        kpts_db (list): The predicted keypoints within the image
+        thresh (float): The threshold to select the boxes
+        sigmas (np.array): The variance to calculate the oks iou
+            Default: None
+        in_vis_thre (float): The threshold to select the high confidence boxes
+            Default: None
+
+    Return:
+        keep (list): indexes to keep
+    """
+
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array(
+        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+                          sigmas, in_vis_thre)
+
+        inds = np.where(oks_ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def rescore(overlap, scores, thresh, type='gaussian'):
+    assert overlap.shape[0] == scores.shape[0]
+    if type == 'linear':
+        inds = np.where(overlap >= thresh)[0]
+        scores[inds] = scores[inds] * (1 - overlap[inds])
+    else:
+        scores = scores * np.exp(-overlap**2 / thresh)
+
+    return scores
+
+
+def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh
+
+    Args:
+        kpts_db (list): The predicted keypoints within the image
+        thresh (float): The threshold to select the boxes
+        sigmas (np.array): The variance to calculate the oks iou
+            Default: None
+        in_vis_thre (float): The threshold to select the high confidence boxes
+            Default: None
+
+    Return:
+        keep (list): indexes to keep
+    """
+
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array(
+        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+    scores = scores[order]
+
+    # max_dets = order.size
+    max_dets = 20
+    keep = np.zeros(max_dets, dtype=np.intp)
+    keep_cnt = 0
+    while order.size > 0 and keep_cnt < max_dets:
+        i = order[0]
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+                          sigmas, in_vis_thre)
+
+        order = order[1:]
+        scores = rescore(oks_ovr, scores[1:], thresh)
+
+        tmp = scores.argsort()[::-1]
+        order = order[tmp]
+        scores = scores[tmp]
+
+        keep[keep_cnt] = i
+        keep_cnt += 1
+
+    keep = keep[:keep_cnt]
+
+    return keep
--- a/build/lib/ppdet/modeling/layers.py
+++ b/build/lib/ppdet/modeling/layers.py
--- a/build/lib/ppdet/modeling/losses/init.py
+++ b/build/lib/ppdet/modeling/losses/init.py
@ -0,0 +1,35 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import yolo_loss
+from . import iou_aware_loss
+from . import iou_loss
+from . import ssd_loss
+from . import fcos_loss
+from . import solov2_loss
+from . import ctfocal_loss
+from . import keypoint_loss
+from . import jde_loss
+from . import fairmot_loss
+
+from .yolo_loss import *
+from .iou_aware_loss import *
+from .iou_loss import *
+from .ssd_loss import *
+from .fcos_loss import *
+from .solov2_loss import *
+from .ctfocal_loss import *
+from .keypoint_loss import *
+from .jde_loss import *
+from .fairmot_loss import *
--- a/Show More
+++ b/Show More