第一次提交

2022-06-01 11:18:00 +08:00 · 2022-06-01 11:18:00 +08:00 · 0cab1d8282
parent e753eb708c
commit 0cab1d8282
1246 changed files with 187079 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,38 @@
-# PulseFocusPlatform
+# Pulse Focus Platform脉冲聚焦
 ## 0. 软件介绍
 Pulse Focus Platform脉冲聚焦是面向水底物体图像识别的实时检测软件。软件以面向对象的设计理念，采用Python语言编程，基于pyqt、paddle、pyyaml以及ppdet等技术开发，支持多批量图像、长视频等多种本地数据源，预置多种物体识别模型，并提供扩展接口，方便新模型的集成与验证。平台安装简单，运行方便，可选参数丰富，扩展性高，非常适用于相关研究领域的工程技术人员和学生掌握学习侧扫或光学数据等形成的水底图像中关注物体的识别方法。
 脉冲聚焦软件设计了图片和视频两种数据输入下的多物体识别功能。针对图片数据，调用模型进行单张图片预测，随后在前端可视化输出多物体识别结果；针对视频流动态图像数据，首先对视频流数据进行分帧采样，获取采样图片，再针对采样图片进行多物体识别，将采样识别结果进行视频合成，然后在前端可视化输出视频流数据识别结果。为了视频流数据处理的高效性，设计了采样-识别-展示的多线程处理方式，可加快视频流数据处理。
 软件界面简单，易学易用，包含参数的输入选择，程序的运行，算法结果的展示等，源代码公开，算法可修改。
 开发人员：K. Wang、H.P. Yu、J. Li、H.T. Li、Z.Q. Wang、Z.Y. Zhao、L.F. Zhang、G. Chen
 ## 1. 开发环境配置
 运行以下命令：
 ```bash
 conda env create -f create_env.yaml
 ```
 该命令会创建一个名为`Focus`的conda虚拟环境，用`conda activate Focus`即可激活该虚拟环境。
 ## 2. 软件运行
 运行以下命令运行软件：
 ```python
 python main.py
 ```
 ## 3. 一些说明
 1. 使用GPU版本
    参考百度飞桨paddle官方网站安装
    [安装链接](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html)
 2. 模型文件连接
    由于软件包含已训练好的三个模型，模型参数较大，放在百度网盘自行下载，并放在根目录。
    链接：https://pan.baidu.com/s/1Wf4gjmccgqQYeknm9pX2jQ 
    提取码：2gs7 
--- a/SSS_win.py
+++ b/SSS_win.py
@ -0,0 +1,188 @@
 # -*- coding: utf-8 -*-
 # Form implementation generated from reading ui file 'SSS_win.ui'
 #
 # Created by: PyQt5 UI code generator 5.15.4
 #
 # WARNING: Any manual changes made to this file will be lost when pyuic5 is
 # run again.  Do not edit this file unless you know what you are doing.
 from PyQt5 import QtCore, QtGui, QtWidgets
 class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1353, 1007)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.verticalLayout_5 = QtWidgets.QVBoxLayout(self.centralwidget)
        self.verticalLayout_5.setObjectName("verticalLayout_5")
        self.verticalLayout = QtWidgets.QVBoxLayout()
        self.verticalLayout.setObjectName("verticalLayout")
        self.horizontalLayout = QtWidgets.QHBoxLayout()
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.verticalLayout_2 = QtWidgets.QVBoxLayout()
        self.verticalLayout_2.setObjectName("verticalLayout_2")
        self.tupiandiaoru = QtWidgets.QPushButton(self.centralwidget)
        self.tupiandiaoru.setObjectName("tupiandiaoru")
        self.verticalLayout_2.addWidget(self.tupiandiaoru)
        self.shipindaoru = QtWidgets.QPushButton(self.centralwidget)
        self.shipindaoru.setObjectName("shipindaoru")
        self.verticalLayout_2.addWidget(self.shipindaoru)
        self.pushButton_xxdaochu = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_xxdaochu.setObjectName("pushButton_xxdaochu")
        self.verticalLayout_2.addWidget(self.pushButton_xxdaochu)
        self.horizontalLayout.addLayout(self.verticalLayout_2)
        self.formLayout = QtWidgets.QFormLayout()
        self.formLayout.setObjectName("formLayout")
        self.comboBox_tzxuanze = QtWidgets.QComboBox(self.centralwidget)
        self.comboBox_tzxuanze.setEditable(True)
        self.comboBox_tzxuanze.setObjectName("comboBox_tzxuanze")
        self.comboBox_tzxuanze.addItem("")
        self.formLayout.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.comboBox_tzxuanze)
        self.label_2 = QtWidgets.QLabel(self.centralwidget)
        self.label_2.setObjectName("label_2")
        self.formLayout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.label_2)
        self.comboBox_yclfangfa = QtWidgets.QComboBox(self.centralwidget)
        self.comboBox_yclfangfa.setEditable(True)
        self.comboBox_yclfangfa.setObjectName("comboBox_yclfangfa")
        self.comboBox_yclfangfa.addItem("")
        self.comboBox_yclfangfa.addItem("")
        self.comboBox_yclfangfa.addItem("")
        self.formLayout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.comboBox_yclfangfa)
        self.label_3 = QtWidgets.QLabel(self.centralwidget)
        self.label_3.setObjectName("label_3")
        self.formLayout.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.label_3)
        self.comboBox_sbsuanfa = QtWidgets.QComboBox(self.centralwidget)
        self.comboBox_sbsuanfa.setEditable(True)
        self.comboBox_sbsuanfa.setObjectName("comboBox_sbsuanfa")
        self.comboBox_sbsuanfa.addItem("")
        self.comboBox_sbsuanfa.addItem("")
        self.comboBox_sbsuanfa.addItem("")
        self.formLayout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.comboBox_sbsuanfa)
        self.label_4 = QtWidgets.QLabel(self.centralwidget)
        self.label_4.setObjectName("label_4")
        self.formLayout.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.label_4)
        self.comboBox_GPU = QtWidgets.QComboBox(self.centralwidget)
        self.comboBox_GPU.setEditable(True)
        self.comboBox_GPU.setObjectName("comboBox_GPU")
        self.comboBox_GPU.addItem("")
        self.comboBox_GPU.addItem("")
        self.formLayout.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.comboBox_GPU)
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setObjectName("label")
        self.formLayout.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label)
        self.horizontalLayout.addLayout(self.formLayout)
        self.verticalLayout_4 = QtWidgets.QVBoxLayout()
        self.verticalLayout_4.setObjectName("verticalLayout_4")
        self.shibie = QtWidgets.QPushButton(self.centralwidget)
        self.shibie.setObjectName("shibie")
        self.verticalLayout_4.addWidget(self.shibie)
        self.xunlian = QtWidgets.QPushButton(self.centralwidget)
        self.xunlian.setObjectName("xunlian")
        self.verticalLayout_4.addWidget(self.xunlian)
        self.horizontalLayout.addLayout(self.verticalLayout_4)
        self.verticalLayout_3 = QtWidgets.QVBoxLayout()
        self.verticalLayout_3.setObjectName("verticalLayout_3")
        self.pushButton_jswendang = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_jswendang.setObjectName("pushButton_jswendang")
        self.verticalLayout_3.addWidget(self.pushButton_jswendang)
        self.pushButton_rjwendang = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_rjwendang.setObjectName("pushButton_rjwendang")
        self.verticalLayout_3.addWidget(self.pushButton_rjwendang)
        self.pushButton_tuichu = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_tuichu.setObjectName("pushButton_tuichu")
        self.verticalLayout_3.addWidget(self.pushButton_tuichu)
        self.horizontalLayout.addLayout(self.verticalLayout_3)
        self.verticalLayout.addLayout(self.horizontalLayout)
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
        self.label_movie = QtWidgets.QLabel(self.centralwidget)
        self.label_movie.setMinimumSize(QtCore.QSize(1200, 800))
        self.label_movie.setText("")
        self.label_movie.setObjectName("label_movie")
        self.horizontalLayout_2.addWidget(self.label_movie)
        self.formLayout_2 = QtWidgets.QFormLayout()
        self.formLayout_2.setObjectName("formLayout_2")
        self.label_mb1 = QtWidgets.QLabel(self.centralwidget)
        self.label_mb1.setObjectName("label_mb1")
        self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label_mb1)
        self.flv = QtWidgets.QLabel(self.centralwidget)
        self.flv.setObjectName("flv")
        self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.flv)
        self.label_mb2 = QtWidgets.QLabel(self.centralwidget)
        self.label_mb2.setObjectName("label_mb2")
        self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.label_mb2)
        self.gx = QtWidgets.QLabel(self.centralwidget)
        self.gx.setObjectName("gx")
        self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.gx)
        self.label_mb3 = QtWidgets.QLabel(self.centralwidget)
        self.label_mb3.setObjectName("label_mb3")
        self.formLayout_2.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.label_mb3)
        self.mbw = QtWidgets.QLabel(self.centralwidget)
        self.mbw.setObjectName("mbw")
        self.formLayout_2.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.mbw)
        self.label_5 = QtWidgets.QLabel(self.centralwidget)
        self.label_5.setObjectName("label_5")
        self.formLayout_2.setWidget(3, QtWidgets.QFormLayout.LabelRole, self.label_5)
        self.label_object = QtWidgets.QLabel(self.centralwidget)
        self.label_object.setObjectName("label_object")
        self.formLayout_2.setWidget(3, QtWidgets.QFormLayout.FieldRole, self.label_object)
        self.horizontalLayout_2.addLayout(self.formLayout_2)
        self.verticalLayout.addLayout(self.horizontalLayout_2)
        self.label_10 = QtWidgets.QLabel(self.centralwidget)
        self.label_10.setObjectName("label_10")
        self.verticalLayout.addWidget(self.label_10)
        self.verticalLayout_5.addLayout(self.verticalLayout)
        MainWindow.setCentralWidget(self.centralwidget)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.retranslateUi(MainWindow)
        self.tupiandiaoru.clicked.connect(MainWindow.import_pic)
        self.shibie.clicked.connect(MainWindow.press_shibie)
        self.xunlian.clicked.connect(MainWindow.press_xunlian)
        self.pushButton_tuichu.clicked.connect(MainWindow.exit)
        self.shipindaoru.clicked.connect(MainWindow.press_movie)
        self.comboBox_sbsuanfa.activated.connect(MainWindow.moxingxuanze)
        self.comboBox_GPU.activated.connect(MainWindow.gpu_use)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "脉冲聚焦"))
        self.tupiandiaoru.setText(_translate("MainWindow", "静态图像导入"))
        self.shipindaoru.setText(_translate("MainWindow", "动态图像导入"))
        self.pushButton_xxdaochu.setText(_translate("MainWindow", "信息导出"))
        self.comboBox_tzxuanze.setItemText(0, _translate("MainWindow", "无"))
        self.label_2.setText(_translate("MainWindow", "预处理方法"))
        self.comboBox_yclfangfa.setItemText(0, _translate("MainWindow", "多尺度融合"))
        self.comboBox_yclfangfa.setItemText(1, _translate("MainWindow", "图像增广"))
        self.comboBox_yclfangfa.setItemText(2, _translate("MainWindow", "图像重塑"))
        self.label_3.setText(_translate("MainWindow", "聚焦算法"))
        self.comboBox_sbsuanfa.setCurrentText(_translate("MainWindow", "PPYOLO-BOT"))
        self.comboBox_sbsuanfa.setItemText(0, _translate("MainWindow", "PPYOLO-BOT"))
        self.comboBox_sbsuanfa.setItemText(1, _translate("MainWindow", "YOLOV3"))
        self.comboBox_sbsuanfa.setItemText(2, _translate("MainWindow", "RCNN"))
        self.label_4.setText(_translate("MainWindow", "GPU加速"))
        self.comboBox_GPU.setCurrentText(_translate("MainWindow", "YES"))
        self.comboBox_GPU.setItemText(0, _translate("MainWindow", "YES"))
        self.comboBox_GPU.setItemText(1, _translate("MainWindow", "NO"))
        self.label.setText(_translate("MainWindow", "特征选择"))
        self.shibie.setText(_translate("MainWindow", "聚焦"))
        self.xunlian.setText(_translate("MainWindow", "训练"))
        self.pushButton_jswendang.setText(_translate("MainWindow", "技术文档"))
        self.pushButton_rjwendang.setText(_translate("MainWindow", "软件说明文档"))
        self.pushButton_tuichu.setText(_translate("MainWindow", "退出"))
        self.label_mb1.setText(_translate("MainWindow", "flv"))
        self.flv.setText(_translate("MainWindow", "0"))
        self.label_mb2.setText(_translate("MainWindow", "gx"))
        self.gx.setText(_translate("MainWindow", "0"))
        self.label_mb3.setText(_translate("MainWindow", "mbw"))
        self.mbw.setText(_translate("MainWindow", "0"))
        self.label_5.setText(_translate("MainWindow", "object"))
        self.label_object.setText(_translate("MainWindow", "0"))
        self.label_10.setText(_translate("MainWindow", "TextLabel"))
--- a/SSS_win.ui
+++ b/SSS_win.ui
@ -0,0 +1,417 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>MainWindow</class>
 <widget class="QMainWindow" name="MainWindow">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>1353</width>
    <height>1007</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>MainWindow</string>
  </property>
  <widget class="QWidget" name="centralwidget">
   <layout class="QVBoxLayout" name="verticalLayout_5">
    <item>
     <layout class="QVBoxLayout" name="verticalLayout">
      <item>
       <layout class="QHBoxLayout" name="horizontalLayout">
        <item>
         <layout class="QVBoxLayout" name="verticalLayout_2">
          <item>
           <widget class="QPushButton" name="tupiandiaoru">
            <property name="text">
             <string>静态图像导入</string>
            </property>
           </widget>
          </item>
          <item>
           <widget class="QPushButton" name="shipindaoru">
            <property name="text">
             <string>动态图像导入</string>
            </property>
           </widget>
          </item>
          <item>
           <widget class="QPushButton" name="pushButton_xxdaochu">
            <property name="text">
             <string>信息导出</string>
            </property>
           </widget>
          </item>
         </layout>
        </item>
        <item>
         <layout class="QFormLayout" name="formLayout">
          <item row="0" column="1">
           <widget class="QComboBox" name="comboBox_tzxuanze">
            <property name="editable">
             <bool>true</bool>
            </property>
            <item>
             <property name="text">
              <string>无</string>
             </property>
            </item>
           </widget>
          </item>
          <item row="1" column="0">
           <widget class="QLabel" name="label_2">
            <property name="text">
             <string>预处理方法</string>
            </property>
           </widget>
          </item>
          <item row="1" column="1">
           <widget class="QComboBox" name="comboBox_yclfangfa">
            <property name="editable">
             <bool>true</bool>
            </property>
            <item>
             <property name="text">
              <string>多尺度融合</string>
             </property>
            </item>
            <item>
             <property name="text">
              <string>图像增广</string>
             </property>
            </item>
            <item>
             <property name="text">
              <string>图像重塑</string>
             </property>
            </item>
           </widget>
          </item>
          <item row="2" column="0">
           <widget class="QLabel" name="label_3">
            <property name="text">
             <string>识别算法</string>
            </property>
           </widget>
          </item>
          <item row="2" column="1">
           <widget class="QComboBox" name="comboBox_sbsuanfa">
            <property name="editable">
             <bool>true</bool>
            </property>
            <property name="currentText">
             <string>PPYOLO-BOT</string>
            </property>
            <item>
             <property name="text">
              <string>PPYOLO-BOT</string>
             </property>
            </item>
            <item>
             <property name="text">
              <string>YOLOV3</string>
             </property>
            </item>
            <item>
             <property name="text">
              <string>RCNN</string>
             </property>
            </item>
           </widget>
          </item>
          <item row="3" column="0">
           <widget class="QLabel" name="label_4">
            <property name="text">
             <string>GPU加速</string>
            </property>
           </widget>
          </item>
          <item row="3" column="1">
           <widget class="QComboBox" name="comboBox_GPU">
            <property name="editable">
             <bool>true</bool>
            </property>
            <property name="currentText">
             <string>YES</string>
            </property>
            <item>
             <property name="text">
              <string>YES</string>
             </property>
            </item>
            <item>
             <property name="text">
              <string>NO</string>
             </property>
            </item>
           </widget>
          </item>
          <item row="0" column="0">
           <widget class="QLabel" name="label">
            <property name="text">
             <string>特征选择</string>
            </property>
           </widget>
          </item>
         </layout>
        </item>
        <item>
         <layout class="QVBoxLayout" name="verticalLayout_4">
          <item>
           <widget class="QPushButton" name="shibie">
            <property name="text">
             <string>识别</string>
            </property>
           </widget>
          </item>
          <item>
           <widget class="QPushButton" name="xunlian">
            <property name="text">
             <string>训练</string>
            </property>
           </widget>
          </item>
         </layout>
        </item>
        <item>
         <layout class="QVBoxLayout" name="verticalLayout_3">
          <item>
           <widget class="QPushButton" name="pushButton_jswendang">
            <property name="text">
             <string>技术文档</string>
            </property>
           </widget>
          </item>
          <item>
           <widget class="QPushButton" name="pushButton_rjwendang">
            <property name="text">
             <string>软件说明文档</string>
            </property>
           </widget>
          </item>
          <item>
           <widget class="QPushButton" name="pushButton_tuichu">
            <property name="text">
             <string>退出</string>
            </property>
           </widget>
          </item>
         </layout>
        </item>
       </layout>
      </item>
      <item>
       <layout class="QHBoxLayout" name="horizontalLayout_2">
        <item>
         <widget class="QLabel" name="label_movie">
          <property name="minimumSize">
           <size>
            <width>1200</width>
            <height>800</height>
           </size>
          </property>
          <property name="text">
           <string/>
          </property>
         </widget>
        </item>
        <item>
         <layout class="QFormLayout" name="formLayout_2">
          <item row="0" column="0">
           <widget class="QLabel" name="label_mb1">
            <property name="text">
             <string>flv</string>
            </property>
           </widget>
          </item>
          <item row="0" column="1">
           <widget class="QLabel" name="flv">
            <property name="text">
             <string>0</string>
            </property>
           </widget>
          </item>
          <item row="1" column="0">
           <widget class="QLabel" name="label_mb2">
            <property name="text">
             <string>gx</string>
            </property>
           </widget>
          </item>
          <item row="1" column="1">
           <widget class="QLabel" name="gx">
            <property name="text">
             <string>0</string>
            </property>
           </widget>
          </item>
          <item row="2" column="0">
           <widget class="QLabel" name="label_mb3">
            <property name="text">
             <string>mbw</string>
            </property>
           </widget>
          </item>
          <item row="2" column="1">
           <widget class="QLabel" name="mbw">
            <property name="text">
             <string>0</string>
            </property>
           </widget>
          </item>
          <item row="3" column="0">
           <widget class="QLabel" name="label_5">
            <property name="text">
             <string>object</string>
            </property>
           </widget>
          </item>
          <item row="3" column="1">
           <widget class="QLabel" name="label_object">
            <property name="text">
             <string>0</string>
            </property>
           </widget>
          </item>
         </layout>
        </item>
       </layout>
      </item>
      <item>
       <widget class="QLabel" name="label_10">
        <property name="text">
         <string>TextLabel</string>
        </property>
       </widget>
      </item>
     </layout>
    </item>
   </layout>
  </widget>
  <widget class="QStatusBar" name="statusbar"/>
 </widget>
 <resources/>
 <connections>
  <connection>
   <sender>tupiandiaoru</sender>
   <signal>clicked()</signal>
   <receiver>MainWindow</receiver>
   <slot>import_pic()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>255</x>
     <y>42</y>
    </hint>
    <hint type="destinationlabel">
     <x>414</x>
     <y>8</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>shibie</sender>
   <signal>clicked()</signal>
   <receiver>MainWindow</receiver>
   <slot>press_shibie()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>983</x>
     <y>48</y>
    </hint>
    <hint type="destinationlabel">
     <x>1021</x>
     <y>0</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>xunlian</sender>
   <signal>clicked()</signal>
   <receiver>MainWindow</receiver>
   <slot>press_xunlian()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>1101</x>
     <y>115</y>
    </hint>
    <hint type="destinationlabel">
     <x>823</x>
     <y>0</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>pushButton_tuichu</sender>
   <signal>clicked()</signal>
   <receiver>MainWindow</receiver>
   <slot>exit()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>1467</x>
     <y>128</y>
    </hint>
    <hint type="destinationlabel">
     <x>1481</x>
     <y>381</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>shipindaoru</sender>
   <signal>clicked()</signal>
   <receiver>MainWindow</receiver>
   <slot>press_movie()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>362</x>
     <y>89</y>
    </hint>
    <hint type="destinationlabel">
     <x>379</x>
     <y>0</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>comboBox_sbsuanfa</sender>
   <signal>activated(QString)</signal>
   <receiver>MainWindow</receiver>
   <slot>moxingxuanze()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>621</x>
     <y>91</y>
    </hint>
    <hint type="destinationlabel">
     <x>686</x>
     <y>85</y>
    </hint>
   </hints>
  </connection>
  <connection>
   <sender>comboBox_GPU</sender>
   <signal>activated(QString)</signal>
   <receiver>MainWindow</receiver>
   <slot>gpu_use()</slot>
   <hints>
    <hint type="sourcelabel">
     <x>597</x>
     <y>122</y>
    </hint>
    <hint type="destinationlabel">
     <x>656</x>
     <y>122</y>
    </hint>
   </hints>
  </connection>
 </connections>
 <slots>
  <slot>import_pic()</slot>
  <slot>press_shibie()</slot>
  <slot>press_xunlian()</slot>
  <slot>exit()</slot>
  <slot>press_movie()</slot>
  <slot>moxingxuanze()</slot>
  <slot>gpu_use()</slot>
 </slots>
 </ui>
--- a/build/.DS_Store
+++ b/build/.DS_Store
--- a/build/lib/ppdet/init.py
+++ b/build/lib/ppdet/init.py
@ -0,0 +1,16 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import (core, data, engine, modeling, model_zoo, optimizer, metrics,
               utils, slim)
--- a/build/lib/ppdet/core/init.py
+++ b/build/lib/ppdet/core/init.py
@ -0,0 +1,15 @@
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import config
--- a/build/lib/ppdet/core/config/init.py
+++ b/build/lib/ppdet/core/config/init.py
@ -0,0 +1,13 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/build/lib/ppdet/core/config/schema.py
+++ b/build/lib/ppdet/core/config/schema.py
@ -0,0 +1,248 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import print_function
 from __future__ import division
 import inspect
 import importlib
 import re
 try:
    from docstring_parser import parse as doc_parse
 except Exception:
    def doc_parse(*args):
        pass
 try:
    from typeguard import check_type
 except Exception:
    def check_type(*args):
        pass
 __all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema']
 class SchemaValue(object):
    def __init__(self, name, doc='', type=None):
        super(SchemaValue, self).__init__()
        self.name = name
        self.doc = doc
        self.type = type
    def set_default(self, value):
        self.default = value
    def has_default(self):
        return hasattr(self, 'default')
 class SchemaDict(dict):
    def __init__(self, **kwargs):
        super(SchemaDict, self).__init__()
        self.schema = {}
        self.strict = False
        self.doc = ""
        self.update(kwargs)
    def __setitem__(self, key, value):
        # XXX also update regular dict to SchemaDict??
        if isinstance(value, dict) and key in self and isinstance(self[key],
                                                                  SchemaDict):
            self[key].update(value)
        else:
            super(SchemaDict, self).__setitem__(key, value)
    def __missing__(self, key):
        if self.has_default(key):
            return self.schema[key].default
        elif key in self.schema:
            return self.schema[key]
        else:
            raise KeyError(key)
    def copy(self):
        newone = SchemaDict()
        newone.__dict__.update(self.__dict__)
        newone.update(self)
        return newone
    def set_schema(self, key, value):
        assert isinstance(value, SchemaValue)
        self.schema[key] = value
    def set_strict(self, strict):
        self.strict = strict
    def has_default(self, key):
        return key in self.schema and self.schema[key].has_default()
    def is_default(self, key):
        if not self.has_default(key):
            return False
        if hasattr(self[key], '__dict__'):
            return True
        else:
            return key not in self or self[key] == self.schema[key].default
    def find_default_keys(self):
        return [
            k for k in list(self.keys()) + list(self.schema.keys())
            if self.is_default(k)
        ]
    def mandatory(self):
        return any([k for k in self.schema.keys() if not self.has_default(k)])
    def find_missing_keys(self):
        missing = [
            k for k in self.schema.keys()
            if k not in self and not self.has_default(k)
        ]
        placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
        return missing + placeholders
    def find_extra_keys(self):
        return list(set(self.keys()) - set(self.schema.keys()))
    def find_mismatch_keys(self):
        mismatch_keys = []
        for arg in self.schema.values():
            if arg.type is not None:
                try:
                    check_type("{}.{}".format(self.name, arg.name),
                               self[arg.name], arg.type)
                except Exception:
                    mismatch_keys.append(arg.name)
        return mismatch_keys
    def validate(self):
        missing_keys = self.find_missing_keys()
        if missing_keys:
            raise ValueError("Missing param for class<{}>: {}".format(
                self.name, ", ".join(missing_keys)))
        extra_keys = self.find_extra_keys()
        if extra_keys and self.strict:
            raise ValueError("Extraneous param for class<{}>: {}".format(
                self.name, ", ".join(extra_keys)))
        mismatch_keys = self.find_mismatch_keys()
        if mismatch_keys:
            raise TypeError("Wrong param type for class<{}>: {}".format(
                self.name, ", ".join(mismatch_keys)))
 class SharedConfig(object):
    """
    Representation class for `__shared__` annotations, which work as follows:
    - if `key` is set for the module in config file, its value will take
      precedence
    - if `key` is not set for the module but present in the config file, its
      value will be used
    - otherwise, use the provided `default_value` as fallback
    Args:
        key: config[key] will be injected
        default_value: fallback value
    """
    def __init__(self, key, default_value=None):
        super(SharedConfig, self).__init__()
        self.key = key
        self.default_value = default_value
 def extract_schema(cls):
    """
    Extract schema from a given class
    Args:
        cls (type): Class from which to extract.
    Returns:
        schema (SchemaDict): Extracted schema.
    """
    ctor = cls.__init__
    # python 2 compatibility
    if hasattr(inspect, 'getfullargspec'):
        argspec = inspect.getfullargspec(ctor)
        annotations = argspec.annotations
        has_kwargs = argspec.varkw is not None
    else:
        argspec = inspect.getfullargspec(ctor)
        # python 2 type hinting workaround, see pep-3107
        # however, since `typeguard` does not support python 2, type checking
        # is still python 3 only for now
        annotations = getattr(ctor, '__annotations__', {})
        has_kwargs = argspec.varkw is not None
    names = [arg for arg in argspec.args if arg != 'self']
    defaults = argspec.defaults
    num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
    num_required = len(names) - num_defaults
    docs = cls.__doc__
    if docs is None and getattr(cls, '__category__', None) == 'op':
        docs = cls.__call__.__doc__
    try:
        docstring = doc_parse(docs)
    except Exception:
        docstring = None
    if docstring is None:
        comments = {}
    else:
        comments = {}
        for p in docstring.params:
            match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
            if match_obj is not None:
                comments[match_obj.group(1)] = p.description
    schema = SchemaDict()
    schema.name = cls.__name__
    schema.doc = ""
    if docs is not None:
        start_pos = docs[0] == '\n' and 1 or 0
        schema.doc = docs[start_pos:].split("\n")[0].strip()
    # XXX handle paddle's weird doc convention
    if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
        schema.doc = schema.doc[2:-2].strip()
    schema.category = hasattr(cls, '__category__') and getattr(
        cls, '__category__') or 'module'
    schema.strict = not has_kwargs
    schema.pymodule = importlib.import_module(cls.__module__)
    schema.inject = getattr(cls, '__inject__', [])
    schema.shared = getattr(cls, '__shared__', [])
    for idx, name in enumerate(names):
        comment = name in comments and comments[name] or name
        if name in schema.inject:
            type_ = None
        else:
            type_ = name in annotations and annotations[name] or None
        value_schema = SchemaValue(name, comment, type_)
        if name in schema.shared:
            assert idx >= num_required, "shared config must have default value"
            default = defaults[idx - num_required]
            value_schema.set_default(SharedConfig(name, default))
        elif idx >= num_required:
            default = defaults[idx - num_required]
            value_schema.set_default(default)
        schema.set_schema(name, value_schema)
    return schema
--- a/build/lib/ppdet/core/config/yaml_helpers.py
+++ b/build/lib/ppdet/core/config/yaml_helpers.py
@ -0,0 +1,118 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import importlib
 import inspect
 import yaml
 from .schema import SharedConfig
 __all__ = ['serializable', 'Callable']
 def represent_dictionary_order(self, dict_data):
    return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
 def setup_orderdict():
    from collections import OrderedDict
    yaml.add_representer(OrderedDict, represent_dictionary_order)
 def _make_python_constructor(cls):
    def python_constructor(loader, node):
        if isinstance(node, yaml.SequenceNode):
            args = loader.construct_sequence(node, deep=True)
            return cls(*args)
        else:
            kwargs = loader.construct_mapping(node, deep=True)
            try:
                return cls(**kwargs)
            except Exception as ex:
                print("Error when construct {} instance from yaml config".
                      format(cls.__name__))
                raise ex
    return python_constructor
 def _make_python_representer(cls):
    # python 2 compatibility
    if hasattr(inspect, 'getfullargspec'):
        argspec = inspect.getfullargspec(cls)
    else:
        argspec = inspect.getfullargspec(cls.__init__)
    argnames = [arg for arg in argspec.args if arg != 'self']
    def python_representer(dumper, obj):
        if argnames:
            data = {name: getattr(obj, name) for name in argnames}
        else:
            data = obj.__dict__
        if '_id' in data:
            del data['_id']
        return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
    return python_representer
 def serializable(cls):
    """
    Add loader and dumper for given class, which must be
    "trivially serializable"
    Args:
        cls: class to be serialized
    Returns: cls
    """
    yaml.add_constructor(u'!{}'.format(cls.__name__),
                         _make_python_constructor(cls))
    yaml.add_representer(cls, _make_python_representer(cls))
    return cls
 yaml.add_representer(SharedConfig,
                     lambda d, o: d.represent_data(o.default_value))
@serializable
 class Callable(object):
    """
    Helper to be used in Yaml for creating arbitrary class objects
    Args:
        full_type (str): the full module path to target function
    """
    def __init__(self, full_type, args=[], kwargs={}):
        super(Callable, self).__init__()
        self.full_type = full_type
        self.args = args
        self.kwargs = kwargs
    def __call__(self):
        if '.' in self.full_type:
            idx = self.full_type.rfind('.')
            module = importlib.import_module(self.full_type[:idx])
            func_name = self.full_type[idx + 1:]
        else:
            try:
                module = importlib.import_module('builtins')
            except Exception:
                module = importlib.import_module('__builtin__')
            func_name = self.full_type
        func = getattr(module, func_name)
        return func(*self.args, **self.kwargs)
--- a/build/lib/ppdet/core/workspace.py
+++ b/build/lib/ppdet/core/workspace.py
@ -0,0 +1,275 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import print_function
 from __future__ import division
 import importlib
 import os
 import sys
 import yaml
 import collections
 try:
    collectionsAbc = collections.abc
 except AttributeError:
    collectionsAbc = collections
 from .config.schema import SchemaDict, SharedConfig, extract_schema
 from .config.yaml_helpers import serializable
 __all__ = [
    'global_config',
    'load_config',
    'merge_config',
    'get_registered_modules',
    'create',
    'register',
    'serializable',
    'dump_value',
 ]
 def dump_value(value):
    # XXX this is hackish, but collections.abc is not available in python 2
    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
        value = yaml.dump(value, default_flow_style=True)
        value = value.replace('\n', '')
        value = value.replace('...', '')
        return "'{}'".format(value)
    else:
        # primitive types
        return str(value)
 class AttrDict(dict):
    """Single level attribute dict, NOT recursive"""
    def __init__(self, **kwargs):
        super(AttrDict, self).__init__()
        super(AttrDict, self).update(kwargs)
    def __getattr__(self, key):
        if key in self:
            return self[key]
        raise AttributeError("object has no attribute '{}'".format(key))
 global_config = AttrDict()
 BASE_KEY = '_BASE_'
 # parse and load _BASE_ recursively
 def _load_config_with_base(file_path):
    with open(file_path) as f:
        file_cfg = yaml.load(f, Loader=yaml.Loader)
    # NOTE: cfgs outside have higher priority than cfgs in _BASE_
    if BASE_KEY in file_cfg:
        all_base_cfg = AttrDict()
        base_ymls = list(file_cfg[BASE_KEY])
        for base_yml in base_ymls:
            if base_yml.startswith("~"):
                base_yml = os.path.expanduser(base_yml)
            if not base_yml.startswith('/'):
                base_yml = os.path.join(os.path.dirname(file_path), base_yml)
            with open(base_yml) as f:
                base_cfg = _load_config_with_base(base_yml)
                all_base_cfg = merge_config(base_cfg, all_base_cfg)
        del file_cfg[BASE_KEY]
        return merge_config(file_cfg, all_base_cfg)
    return file_cfg
 def load_config(file_path):
    """
    Load config from file.
    Args:
        file_path (str): Path of the config file to be loaded.
    Returns: global config
    """
    _, ext = os.path.splitext(file_path)
    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
    # load config from file and merge into global config
    cfg = _load_config_with_base(file_path)
    cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
    merge_config(cfg)
    return global_config
 def dict_merge(dct, merge_dct):
    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
    updating only top-level keys, dict_merge recurses down into dicts nested
    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
    ``dct``.
    Args:
        dct: dict onto which the merge is executed
        merge_dct: dct merged into dct
    Returns: dct
    """
    for k, v in merge_dct.items():
        if (k in dct and isinstance(dct[k], dict) and
                isinstance(merge_dct[k], collectionsAbc.Mapping)):
            dict_merge(dct[k], merge_dct[k])
        else:
            dct[k] = merge_dct[k]
    return dct
 def merge_config(config, another_cfg=None):
    """
    Merge config into global config or another_cfg.
    Args:
        config (dict): Config to be merged.
    Returns: global config
    """
    global global_config
    dct = another_cfg or global_config
    return dict_merge(dct, config)
 def get_registered_modules():
    return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}
 def make_partial(cls):
    op_module = importlib.import_module(cls.__op__.__module__)
    op = getattr(op_module, cls.__op__.__name__)
    cls.__category__ = getattr(cls, '__category__', None) or 'op'
    def partial_apply(self, *args, **kwargs):
        kwargs_ = self.__dict__.copy()
        kwargs_.update(kwargs)
        return op(*args, **kwargs_)
    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
        if sys.version_info[0] > 2:
            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
            cls.__init__.__doc__ = op.__doc__
            cls.__call__ = partial_apply
            cls.__call__.__doc__ = op.__doc__
        else:
            # XXX work around for python 2
            partial_apply.__doc__ = op.__doc__
            cls.__call__ = partial_apply
    return cls
 def register(cls):
    """
    Register a given module class.
    Args:
        cls (type): Module class to be registered.
    Returns: cls
    """
    if cls.__name__ in global_config:
        raise ValueError("Module class already registered: {}".format(
            cls.__name__))
    if hasattr(cls, '__op__'):
        cls = make_partial(cls)
    global_config[cls.__name__] = extract_schema(cls)
    return cls
 def create(cls_or_name, **kwargs):
    """
    Create an instance of given module class.
    Args:
        cls_or_name (type or str): Class of which to create instance.
    Returns: instance of type `cls_or_name`
    """
    assert type(cls_or_name) in [type, str
                                 ], "should be a class or name of a class"
    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
    assert name in global_config and \
        isinstance(global_config[name], SchemaDict), \
        "the module {} is not registered".format(name)
    config = global_config[name]
    cls = getattr(config.pymodule, name)
    cls_kwargs = {}
    cls_kwargs.update(global_config[name])
    # parse `shared` annoation of registered modules
    if getattr(config, 'shared', None):
        for k in config.shared:
            target_key = config[k]
            shared_conf = config.schema[k].default
            assert isinstance(shared_conf, SharedConfig)
            if target_key is not None and not isinstance(target_key,
                                                         SharedConfig):
                continue  # value is given for the module
            elif shared_conf.key in global_config:
                # `key` is present in config
                cls_kwargs[k] = global_config[shared_conf.key]
            else:
                cls_kwargs[k] = shared_conf.default_value
    # parse `inject` annoation of registered modules
    if getattr(cls, 'from_config', None):
        cls_kwargs.update(cls.from_config(config, **kwargs))
    if getattr(config, 'inject', None):
        for k in config.inject:
            target_key = config[k]
            # optional dependency
            if target_key is None:
                continue
            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
                if 'name' not in target_key.keys():
                    continue
                inject_name = str(target_key['name'])
                if inject_name not in global_config:
                    raise ValueError(
                        "Missing injection name {} and check it's name in cfg file".
                        format(k))
                target = global_config[inject_name]
                for i, v in target_key.items():
                    if i == 'name':
                        continue
                    target[i] = v
                if isinstance(target, SchemaDict):
                    cls_kwargs[k] = create(inject_name)
            elif isinstance(target_key, str):
                if target_key not in global_config:
                    raise ValueError("Missing injection config:", target_key)
                target = global_config[target_key]
                if isinstance(target, SchemaDict):
                    cls_kwargs[k] = create(target_key)
                elif hasattr(target, '__dict__'):  # serialized object
                    cls_kwargs[k] = target
            else:
                raise ValueError("Unsupported injection type:", target_key)
    # prevent modification of global config values of reference types
    # (e.g., list, dict) from within the created module instances
    #kwargs = copy.deepcopy(kwargs)
    return cls(**cls_kwargs)
--- a/build/lib/ppdet/data/init.py
+++ b/build/lib/ppdet/data/init.py
@ -0,0 +1,21 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from . import source
 from . import transform
 from . import reader
 from .source import *
 from .transform import *
 from .reader import *
--- a/build/lib/ppdet/data/reader.py
+++ b/build/lib/ppdet/data/reader.py
@ -0,0 +1,303 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import traceback
 import six
 import sys
 if sys.version_info >= (3, 0):
    pass
 else:
    pass
 import numpy as np
 from paddle.io import DataLoader, DistributedBatchSampler
 from paddle.fluid.dataloader.collate import default_collate_fn
 from ppdet.core.workspace import register
 from . import transform
 from .shm_utils import _get_shared_memory_size_in_M
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('reader')
 MAIN_PID = os.getpid()
 class Compose(object):
    def __init__(self, transforms, num_classes=80):
        self.transforms = transforms
        self.transforms_cls = []
        for t in self.transforms:
            for k, v in t.items():
                op_cls = getattr(transform, k)
                f = op_cls(**v)
                if hasattr(f, 'num_classes'):
                    f.num_classes = num_classes
                self.transforms_cls.append(f)
    def __call__(self, data):
        for f in self.transforms_cls:
            try:
                data = f(data)
            except Exception as e:
                stack_info = traceback.format_exc()
                logger.warning("fail to map sample transform [{}] "
                               "with error: {} and stack:\n{}".format(
                                   f, e, str(stack_info)))
                raise e
        return data
 class BatchCompose(Compose):
    def __init__(self, transforms, num_classes=80, collate_batch=True):
        super(BatchCompose, self).__init__(transforms, num_classes)
        self.collate_batch = collate_batch
    def __call__(self, data):
        for f in self.transforms_cls:
            try:
                data = f(data)
            except Exception as e:
                stack_info = traceback.format_exc()
                logger.warning("fail to map batch transform [{}] "
                               "with error: {} and stack:\n{}".format(
                                   f, e, str(stack_info)))
                raise e
        # remove keys which is not needed by model
        extra_key = ['h', 'w', 'flipped']
        for k in extra_key:
            for sample in data:
                if k in sample:
                    sample.pop(k)
        # batch data, if user-define batch function needed
        # use user-defined here
        if self.collate_batch:
            batch_data = default_collate_fn(data)
        else:
            batch_data = {}
            for k in data[0].keys():
                tmp_data = []
                for i in range(len(data)):
                    tmp_data.append(data[i][k])
                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
                    tmp_data = np.stack(tmp_data, axis=0)
                batch_data[k] = tmp_data
        return batch_data
 class BaseDataLoader(object):
    """
    Base DataLoader implementation for detection models
    Args:
        sample_transforms (list): a list of transforms to perform
                                  on each sample
        batch_transforms (list): a list of transforms to perform
                                 on batch
        batch_size (int): batch size for batch collating, default 1.
        shuffle (bool): whether to shuffle samples
        drop_last (bool): whether to drop the last incomplete,
                          default False
        num_classes (int): class number of dataset, default 80
        collate_batch (bool): whether to collate batch in dataloader.
            If set to True, the samples will collate into batch according
            to the batch size. Otherwise, the ground-truth will not collate,
            which is used when the number of ground-truch is different in 
            samples.
        use_shared_memory (bool): whether to use shared memory to
                accelerate data loading, enable this only if you
                are sure that the shared memory size of your OS
                is larger than memory cost of input datas of model.
                Note that shared memory will be automatically
                disabled if the shared memory of OS is less than
                1G, which is not enough for detection models.
                Default False.
    """
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=False,
                 num_classes=80,
                 collate_batch=True,
                 use_shared_memory=False,
                 **kwargs):
        # sample transform
        self._sample_transforms = Compose(
            sample_transforms, num_classes=num_classes)
        # batch transfrom 
        self._batch_transforms = BatchCompose(batch_transforms, num_classes,
                                              collate_batch)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.use_shared_memory = use_shared_memory
        self.kwargs = kwargs
    def __call__(self,
                 dataset,
                 worker_num,
                 batch_sampler=None,
                 return_list=False):
        self.dataset = dataset
        self.dataset.check_or_download_dataset()
        self.dataset.parse_dataset()
        # get data
        self.dataset.set_transform(self._sample_transforms)
        # set kwargs
        self.dataset.set_kwargs(**self.kwargs)
        # batch sampler
        if batch_sampler is None:
            self._batch_sampler = DistributedBatchSampler(
                self.dataset,
                batch_size=self.batch_size,
                shuffle=self.shuffle,
                drop_last=self.drop_last)
        else:
            self._batch_sampler = batch_sampler
        # DataLoader do not start sub-process in Windows and Mac
        # system, do not need to use shared memory
        use_shared_memory = self.use_shared_memory and \
                            sys.platform not in ['win32', 'darwin']
        # check whether shared memory size is bigger than 1G(1024M)
        if use_shared_memory:
            shm_size = _get_shared_memory_size_in_M()
            if shm_size is not None and shm_size < 1024.:
                logger.warning("Shared memory size is less than 1G, "
                               "disable shared_memory in DataLoader")
                use_shared_memory = False
        self.dataloader = DataLoader(
            dataset=self.dataset,
            batch_sampler=self._batch_sampler,
            collate_fn=self._batch_transforms,
            num_workers=worker_num,
            return_list=return_list,
            use_shared_memory=use_shared_memory)
        self.loader = iter(self.dataloader)
        return self
    def __len__(self):
        return len(self._batch_sampler)
    def __iter__(self):
        return self
    def __next__(self):
        try:
            return next(self.loader)
        except StopIteration:
            self.loader = iter(self.dataloader)
            six.reraise(*sys.exc_info())
    def next(self):
        # python2 compatibility
        return self.__next__()
@register
 class TrainReader(BaseDataLoader):
    __shared__ = ['num_classes']
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=True,
                 drop_last=True,
                 num_classes=80,
                 collate_batch=True,
                 **kwargs):
        super(TrainReader, self).__init__(sample_transforms, batch_transforms,
                                          batch_size, shuffle, drop_last,
                                          num_classes, collate_batch, **kwargs)
@register
 class EvalReader(BaseDataLoader):
    __shared__ = ['num_classes']
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=True,
                 num_classes=80,
                 **kwargs):
        super(EvalReader, self).__init__(sample_transforms, batch_transforms,
                                         batch_size, shuffle, drop_last,
                                         num_classes, **kwargs)
@register
 class TestReader(BaseDataLoader):
    __shared__ = ['num_classes']
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=False,
                 num_classes=80,
                 **kwargs):
        super(TestReader, self).__init__(sample_transforms, batch_transforms,
                                         batch_size, shuffle, drop_last,
                                         num_classes, **kwargs)
@register
 class EvalMOTReader(BaseDataLoader):
    __shared__ = ['num_classes']
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=False,
                 num_classes=1,
                 **kwargs):
        super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms,
                                            batch_size, shuffle, drop_last,
                                            num_classes, **kwargs)
@register
 class TestMOTReader(BaseDataLoader):
    __shared__ = ['num_classes']
    def __init__(self,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=False,
                 num_classes=1,
                 **kwargs):
        super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
                                            batch_size, shuffle, drop_last,
                                            num_classes, **kwargs)
--- a/build/lib/ppdet/data/shm_utils.py
+++ b/build/lib/ppdet/data/shm_utils.py
@ -0,0 +1,67 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 SIZE_UNIT = ['K', 'M', 'G', 'T']
 SHM_QUERY_CMD = 'df -h'
 SHM_KEY = 'shm'
 SHM_DEFAULT_MOUNT = '/dev/shm'
 # [ shared memory size check ]
 # In detection models, image/target data occupies a lot of memory, and
 # will occupy lots of shared memory in multi-process DataLoader, we use
 # following code to get shared memory size and perform a size check to
 # disable shared memory use if shared memory size is not enough.
 # Shared memory getting process as follows:
 # 1. use `df -h` get all mount info
 # 2. pick up spaces whose mount info contains 'shm'
 # 3. if 'shm' space number is only 1, return its size
 # 4. if there are multiple 'shm' space, try to find the default mount
 #    directory '/dev/shm' is Linux-like system, otherwise return the
 #    biggest space size.
 def _parse_size_in_M(size_str):
    num, unit = size_str[:-1], size_str[-1]
    assert unit in SIZE_UNIT, \
            "unknown shm size unit {}".format(unit)
    return float(num) * \
            (1024 ** (SIZE_UNIT.index(unit) - 1))
 def _get_shared_memory_size_in_M():
    try:
        df_infos = os.popen(SHM_QUERY_CMD).readlines()
    except:
        return None
    else:
        shm_infos = []
        for df_info in df_infos:
            info = df_info.strip()
            if info.find(SHM_KEY) >= 0:
                shm_infos.append(info.split())
        if len(shm_infos) == 0:
            return None
        elif len(shm_infos) == 1:
            return _parse_size_in_M(shm_infos[0][3])
        else:
            default_mount_infos = [
                si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
            ]
            if default_mount_infos:
                return _parse_size_in_M(default_mount_infos[0][3])
            else:
                return max([_parse_size_in_M(si[3]) for si in shm_infos])
--- a/build/lib/ppdet/data/source/init.py
+++ b/build/lib/ppdet/data/source/init.py
@ -0,0 +1,27 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import coco
 from . import voc
 from . import widerface
 from . import category
 from . import keypoint_coco
 from . import mot
 from .coco import *
 from .voc import *
 from .widerface import *
 from .category import *
 from .keypoint_coco import *
 from .mot import *
--- a/build/lib/ppdet/data/source/category.py
+++ b/build/lib/ppdet/data/source/category.py
@ -0,0 +1,823 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 from ppdet.data.source.voc import pascalvoc_label
 from ppdet.data.source.widerface import widerface_label
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = ['get_categories']
 def get_categories(metric_type, anno_file=None, arch=None):
    """
    Get class id to category id map and category id
    to category name map from annotation file.
    Args:
        metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
            and 'widerface'.
        anno_file (str): annotation file path
    """
    if arch == 'keypoint_arch':
        return (None, {'id': 'keypoint'})
    if metric_type.lower() == 'coco':
        if anno_file and os.path.isfile(anno_file):
            # lazy import pycocotools here
            from pycocotools.coco import COCO
            coco = COCO(anno_file)
            cats = coco.loadCats(coco.getCatIds())
            clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
            catid2name = {cat['id']: cat['name'] for cat in cats}
            return clsid2catid, catid2name
        # anno file not exist, load default categories of COCO17
        else:
            return _coco17_category()
    elif metric_type.lower() == 'voc':
        if anno_file and os.path.isfile(anno_file):
            cats = []
            with open(anno_file) as f:
                for line in f.readlines():
                    cats.append(line.strip())
            if cats[0] == 'background':
                cats = cats[1:]
            clsid2catid = {i: i for i in range(len(cats))}
            catid2name = {i: name for i, name in enumerate(cats)}
            return clsid2catid, catid2name
        # anno file not exist, load default categories of
        # VOC all 20 categories
        else:
            return _vocall_category()
    elif metric_type.lower() == 'oid':
        if anno_file and os.path.isfile(anno_file):
            logger.warning("only default categories support for OID19")
        return _oid19_category()
    elif metric_type.lower() == 'widerface':
        return _widerface_category()
    elif metric_type.lower() == 'keypointtopdowncocoeval':
        return (None, {'id': 'keypoint'})
    elif metric_type.lower() in ['mot', 'motdet', 'reid']:
        return _mot_category()
    else:
        raise ValueError("unknown metric type {}".format(metric_type))
 def _mot_category():
    """
    Get class id to category id map and category id
    to category name map of mot dataset
    """
    label_map = {'person': 0}
    label_map = sorted(label_map.items(), key=lambda x: x[1])
    cats = [l[0] for l in label_map]
    clsid2catid = {i: i for i in range(len(cats))}
    catid2name = {i: name for i, name in enumerate(cats)}
    return clsid2catid, catid2name
 def _coco17_category():
    """
    Get class id to category id map and category id
    to category name map of COCO2017 dataset
    """
    clsid2catid = {
        1: 1,
        2: 2,
        3: 3,
        4: 4,
        5: 5,
        6: 6,
        7: 7,
        8: 8,
        9: 9,
        10: 10,
        11: 11,
        12: 13,
        13: 14,
        14: 15,
        15: 16,
        16: 17,
        17: 18,
        18: 19,
        19: 20,
        20: 21,
        21: 22,
        22: 23,
        23: 24,
        24: 25,
        25: 27,
        26: 28,
        27: 31,
        28: 32,
        29: 33,
        30: 34,
        31: 35,
        32: 36,
        33: 37,
        34: 38,
        35: 39,
        36: 40,
        37: 41,
        38: 42,
        39: 43,
        40: 44,
        41: 46,
        42: 47,
        43: 48,
        44: 49,
        45: 50,
        46: 51,
        47: 52,
        48: 53,
        49: 54,
        50: 55,
        51: 56,
        52: 57,
        53: 58,
        54: 59,
        55: 60,
        56: 61,
        57: 62,
        58: 63,
        59: 64,
        60: 65,
        61: 67,
        62: 70,
        63: 72,
        64: 73,
        65: 74,
        66: 75,
        67: 76,
        68: 77,
        69: 78,
        70: 79,
        71: 80,
        72: 81,
        73: 82,
        74: 84,
        75: 85,
        76: 86,
        77: 87,
        78: 88,
        79: 89,
        80: 90
    }
    catid2name = {
        0: 'background',
        1: 'person',
        2: 'bicycle',
        3: 'car',
        4: 'motorcycle',
        5: 'airplane',
        6: 'bus',
        7: 'train',
        8: 'truck',
        9: 'boat',
        10: 'traffic light',
        11: 'fire hydrant',
        13: 'stop sign',
        14: 'parking meter',
        15: 'bench',
        16: 'bird',
        17: 'cat',
        18: 'dog',
        19: 'horse',
        20: 'sheep',
        21: 'cow',
        22: 'elephant',
        23: 'bear',
        24: 'zebra',
        25: 'giraffe',
        27: 'backpack',
        28: 'umbrella',
        31: 'handbag',
        32: 'tie',
        33: 'suitcase',
        34: 'frisbee',
        35: 'skis',
        36: 'snowboard',
        37: 'sports ball',
        38: 'kite',
        39: 'baseball bat',
        40: 'baseball glove',
        41: 'skateboard',
        42: 'surfboard',
        43: 'tennis racket',
        44: 'bottle',
        46: 'wine glass',
        47: 'cup',
        48: 'fork',
        49: 'knife',
        50: 'spoon',
        51: 'bowl',
        52: 'banana',
        53: 'apple',
        54: 'sandwich',
        55: 'orange',
        56: 'broccoli',
        57: 'carrot',
        58: 'hot dog',
        59: 'pizza',
        60: 'donut',
        61: 'cake',
        62: 'chair',
        63: 'couch',
        64: 'potted plant',
        65: 'bed',
        67: 'dining table',
        70: 'toilet',
        72: 'tv',
        73: 'laptop',
        74: 'mouse',
        75: 'remote',
        76: 'keyboard',
        77: 'cell phone',
        78: 'microwave',
        79: 'oven',
        80: 'toaster',
        81: 'sink',
        82: 'refrigerator',
        84: 'book',
        85: 'clock',
        86: 'vase',
        87: 'scissors',
        88: 'teddy bear',
        89: 'hair drier',
        90: 'toothbrush'
    }
    clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
    catid2name.pop(0)
    return clsid2catid, catid2name
 def _vocall_category():
    """
    Get class id to category id map and category id
    to category name map of mixup voc dataset
    """
    label_map = pascalvoc_label()
    label_map = sorted(label_map.items(), key=lambda x: x[1])
    cats = [l[0] for l in label_map]
    clsid2catid = {i: i for i in range(len(cats))}
    catid2name = {i: name for i, name in enumerate(cats)}
    return clsid2catid, catid2name
 def _widerface_category():
    label_map = widerface_label()
    label_map = sorted(label_map.items(), key=lambda x: x[1])
    cats = [l[0] for l in label_map]
    clsid2catid = {i: i for i in range(len(cats))}
    catid2name = {i: name for i, name in enumerate(cats)}
    return clsid2catid, catid2name
 def _oid19_category():
    clsid2catid = {k: k + 1 for k in range(500)}
    catid2name = {
        0: "background",
        1: "Infant bed",
        2: "Rose",
        3: "Flag",
        4: "Flashlight",
        5: "Sea turtle",
        6: "Camera",
        7: "Animal",
        8: "Glove",
        9: "Crocodile",
        10: "Cattle",
        11: "House",
        12: "Guacamole",
        13: "Penguin",
        14: "Vehicle registration plate",
        15: "Bench",
        16: "Ladybug",
        17: "Human nose",
        18: "Watermelon",
        19: "Flute",
        20: "Butterfly",
        21: "Washing machine",
        22: "Raccoon",
        23: "Segway",
        24: "Taco",
        25: "Jellyfish",
        26: "Cake",
        27: "Pen",
        28: "Cannon",
        29: "Bread",
        30: "Tree",
        31: "Shellfish",
        32: "Bed",
        33: "Hamster",
        34: "Hat",
        35: "Toaster",
        36: "Sombrero",
        37: "Tiara",
        38: "Bowl",
        39: "Dragonfly",
        40: "Moths and butterflies",
        41: "Antelope",
        42: "Vegetable",
        43: "Torch",
        44: "Building",
        45: "Power plugs and sockets",
        46: "Blender",
        47: "Billiard table",
        48: "Cutting board",
        49: "Bronze sculpture",
        50: "Turtle",
        51: "Broccoli",
        52: "Tiger",
        53: "Mirror",
        54: "Bear",
        55: "Zucchini",
        56: "Dress",
        57: "Volleyball",
        58: "Guitar",
        59: "Reptile",
        60: "Golf cart",
        61: "Tart",
        62: "Fedora",
        63: "Carnivore",
        64: "Car",
        65: "Lighthouse",
        66: "Coffeemaker",
        67: "Food processor",
        68: "Truck",
        69: "Bookcase",
        70: "Surfboard",
        71: "Footwear",
        72: "Bench",
        73: "Necklace",
        74: "Flower",
        75: "Radish",
        76: "Marine mammal",
        77: "Frying pan",
        78: "Tap",
        79: "Peach",
        80: "Knife",
        81: "Handbag",
        82: "Laptop",
        83: "Tent",
        84: "Ambulance",
        85: "Christmas tree",
        86: "Eagle",
        87: "Limousine",
        88: "Kitchen & dining room table",
        89: "Polar bear",
        90: "Tower",
        91: "Football",
        92: "Willow",
        93: "Human head",
        94: "Stop sign",
        95: "Banana",
        96: "Mixer",
        97: "Binoculars",
        98: "Dessert",
        99: "Bee",
        100: "Chair",
        101: "Wood-burning stove",
        102: "Flowerpot",
        103: "Beaker",
        104: "Oyster",
        105: "Woodpecker",
        106: "Harp",
        107: "Bathtub",
        108: "Wall clock",
        109: "Sports uniform",
        110: "Rhinoceros",
        111: "Beehive",
        112: "Cupboard",
        113: "Chicken",
        114: "Man",
        115: "Blue jay",
        116: "Cucumber",
        117: "Balloon",
        118: "Kite",
        119: "Fireplace",
        120: "Lantern",
        121: "Missile",
        122: "Book",
        123: "Spoon",
        124: "Grapefruit",
        125: "Squirrel",
        126: "Orange",
        127: "Coat",
        128: "Punching bag",
        129: "Zebra",
        130: "Billboard",
        131: "Bicycle",
        132: "Door handle",
        133: "Mechanical fan",
        134: "Ring binder",
        135: "Table",
        136: "Parrot",
        137: "Sock",
        138: "Vase",
        139: "Weapon",
        140: "Shotgun",
        141: "Glasses",
        142: "Seahorse",
        143: "Belt",
        144: "Watercraft",
        145: "Window",
        146: "Giraffe",
        147: "Lion",
        148: "Tire",
        149: "Vehicle",
        150: "Canoe",
        151: "Tie",
        152: "Shelf",
        153: "Picture frame",
        154: "Printer",
        155: "Human leg",
        156: "Boat",
        157: "Slow cooker",
        158: "Croissant",
        159: "Candle",
        160: "Pancake",
        161: "Pillow",
        162: "Coin",
        163: "Stretcher",
        164: "Sandal",
        165: "Woman",
        166: "Stairs",
        167: "Harpsichord",
        168: "Stool",
        169: "Bus",
        170: "Suitcase",
        171: "Human mouth",
        172: "Juice",
        173: "Skull",
        174: "Door",
        175: "Violin",
        176: "Chopsticks",
        177: "Digital clock",
        178: "Sunflower",
        179: "Leopard",
        180: "Bell pepper",
        181: "Harbor seal",
        182: "Snake",
        183: "Sewing machine",
        184: "Goose",
        185: "Helicopter",
        186: "Seat belt",
        187: "Coffee cup",
        188: "Microwave oven",
        189: "Hot dog",
        190: "Countertop",
        191: "Serving tray",
        192: "Dog bed",
        193: "Beer",
        194: "Sunglasses",
        195: "Golf ball",
        196: "Waffle",
        197: "Palm tree",
        198: "Trumpet",
        199: "Ruler",
        200: "Helmet",
        201: "Ladder",
        202: "Office building",
        203: "Tablet computer",
        204: "Toilet paper",
        205: "Pomegranate",
        206: "Skirt",
        207: "Gas stove",
        208: "Cookie",
        209: "Cart",
        210: "Raven",
        211: "Egg",
        212: "Burrito",
        213: "Goat",
        214: "Kitchen knife",
        215: "Skateboard",
        216: "Salt and pepper shakers",
        217: "Lynx",
        218: "Boot",
        219: "Platter",
        220: "Ski",
        221: "Swimwear",
        222: "Swimming pool",
        223: "Drinking straw",
        224: "Wrench",
        225: "Drum",
        226: "Ant",
        227: "Human ear",
        228: "Headphones",
        229: "Fountain",
        230: "Bird",
        231: "Jeans",
        232: "Television",
        233: "Crab",
        234: "Microphone",
        235: "Home appliance",
        236: "Snowplow",
        237: "Beetle",
        238: "Artichoke",
        239: "Jet ski",
        240: "Stationary bicycle",
        241: "Human hair",
        242: "Brown bear",
        243: "Starfish",
        244: "Fork",
        245: "Lobster",
        246: "Corded phone",
        247: "Drink",
        248: "Saucer",
        249: "Carrot",
        250: "Insect",
        251: "Clock",
        252: "Castle",
        253: "Tennis racket",
        254: "Ceiling fan",
        255: "Asparagus",
        256: "Jaguar",
        257: "Musical instrument",
        258: "Train",
        259: "Cat",
        260: "Rifle",
        261: "Dumbbell",
        262: "Mobile phone",
        263: "Taxi",
        264: "Shower",
        265: "Pitcher",
        266: "Lemon",
        267: "Invertebrate",
        268: "Turkey",
        269: "High heels",
        270: "Bust",
        271: "Elephant",
        272: "Scarf",
        273: "Barrel",
        274: "Trombone",
        275: "Pumpkin",
        276: "Box",
        277: "Tomato",
        278: "Frog",
        279: "Bidet",
        280: "Human face",
        281: "Houseplant",
        282: "Van",
        283: "Shark",
        284: "Ice cream",
        285: "Swim cap",
        286: "Falcon",
        287: "Ostrich",
        288: "Handgun",
        289: "Whiteboard",
        290: "Lizard",
        291: "Pasta",
        292: "Snowmobile",
        293: "Light bulb",
        294: "Window blind",
        295: "Muffin",
        296: "Pretzel",
        297: "Computer monitor",
        298: "Horn",
        299: "Furniture",
        300: "Sandwich",
        301: "Fox",
        302: "Convenience store",
        303: "Fish",
        304: "Fruit",
        305: "Earrings",
        306: "Curtain",
        307: "Grape",
        308: "Sofa bed",
        309: "Horse",
        310: "Luggage and bags",
        311: "Desk",
        312: "Crutch",
        313: "Bicycle helmet",
        314: "Tick",
        315: "Airplane",
        316: "Canary",
        317: "Spatula",
        318: "Watch",
        319: "Lily",
        320: "Kitchen appliance",
        321: "Filing cabinet",
        322: "Aircraft",
        323: "Cake stand",
        324: "Candy",
        325: "Sink",
        326: "Mouse",
        327: "Wine",
        328: "Wheelchair",
        329: "Goldfish",
        330: "Refrigerator",
        331: "French fries",
        332: "Drawer",
        333: "Treadmill",
        334: "Picnic basket",
        335: "Dice",
        336: "Cabbage",
        337: "Football helmet",
        338: "Pig",
        339: "Person",
        340: "Shorts",
        341: "Gondola",
        342: "Honeycomb",
        343: "Doughnut",
        344: "Chest of drawers",
        345: "Land vehicle",
        346: "Bat",
        347: "Monkey",
        348: "Dagger",
        349: "Tableware",
        350: "Human foot",
        351: "Mug",
        352: "Alarm clock",
        353: "Pressure cooker",
        354: "Human hand",
        355: "Tortoise",
        356: "Baseball glove",
        357: "Sword",
        358: "Pear",
        359: "Miniskirt",
        360: "Traffic sign",
        361: "Girl",
        362: "Roller skates",
        363: "Dinosaur",
        364: "Porch",
        365: "Human beard",
        366: "Submarine sandwich",
        367: "Screwdriver",
        368: "Strawberry",
        369: "Wine glass",
        370: "Seafood",
        371: "Racket",
        372: "Wheel",
        373: "Sea lion",
        374: "Toy",
        375: "Tea",
        376: "Tennis ball",
        377: "Waste container",
        378: "Mule",
        379: "Cricket ball",
        380: "Pineapple",
        381: "Coconut",
        382: "Doll",
        383: "Coffee table",
        384: "Snowman",
        385: "Lavender",
        386: "Shrimp",
        387: "Maple",
        388: "Cowboy hat",
        389: "Goggles",
        390: "Rugby ball",
        391: "Caterpillar",
        392: "Poster",
        393: "Rocket",
        394: "Organ",
        395: "Saxophone",
        396: "Traffic light",
        397: "Cocktail",
        398: "Plastic bag",
        399: "Squash",
        400: "Mushroom",
        401: "Hamburger",
        402: "Light switch",
        403: "Parachute",
        404: "Teddy bear",
        405: "Winter melon",
        406: "Deer",
        407: "Musical keyboard",
        408: "Plumbing fixture",
        409: "Scoreboard",
        410: "Baseball bat",
        411: "Envelope",
        412: "Adhesive tape",
        413: "Briefcase",
        414: "Paddle",
        415: "Bow and arrow",
        416: "Telephone",
        417: "Sheep",
        418: "Jacket",
        419: "Boy",
        420: "Pizza",
        421: "Otter",
        422: "Office supplies",
        423: "Couch",
        424: "Cello",
        425: "Bull",
        426: "Camel",
        427: "Ball",
        428: "Duck",
        429: "Whale",
        430: "Shirt",
        431: "Tank",
        432: "Motorcycle",
        433: "Accordion",
        434: "Owl",
        435: "Porcupine",
        436: "Sun hat",
        437: "Nail",
        438: "Scissors",
        439: "Swan",
        440: "Lamp",
        441: "Crown",
        442: "Piano",
        443: "Sculpture",
        444: "Cheetah",
        445: "Oboe",
        446: "Tin can",
        447: "Mango",
        448: "Tripod",
        449: "Oven",
        450: "Mouse",
        451: "Barge",
        452: "Coffee",
        453: "Snowboard",
        454: "Common fig",
        455: "Salad",
        456: "Marine invertebrates",
        457: "Umbrella",
        458: "Kangaroo",
        459: "Human arm",
        460: "Measuring cup",
        461: "Snail",
        462: "Loveseat",
        463: "Suit",
        464: "Teapot",
        465: "Bottle",
        466: "Alpaca",
        467: "Kettle",
        468: "Trousers",
        469: "Popcorn",
        470: "Centipede",
        471: "Spider",
        472: "Sparrow",
        473: "Plate",
        474: "Bagel",
        475: "Personal care",
        476: "Apple",
        477: "Brassiere",
        478: "Bathroom cabinet",
        479: "studio couch",
        480: "Computer keyboard",
        481: "Table tennis racket",
        482: "Sushi",
        483: "Cabinetry",
        484: "Street light",
        485: "Towel",
        486: "Nightstand",
        487: "Rabbit",
        488: "Dolphin",
        489: "Dog",
        490: "Jug",
        491: "Wok",
        492: "Fire hydrant",
        493: "Human eye",
        494: "Skyscraper",
        495: "Backpack",
        496: "Potato",
        497: "Paper towel",
        498: "Lifejacket",
        499: "Bicycle wheel",
        500: "Toilet",
    }
    return clsid2catid, catid2name
--- a/build/lib/ppdet/data/source/coco.py
+++ b/build/lib/ppdet/data/source/coco.py
@ -0,0 +1,241 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import os
 import numpy as np
 from ppdet.core.workspace import register, serializable
 from .dataset import DetDataset
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@register
@serializable
 class COCODataSet(DetDataset):
    """
    Load dataset with COCO format.
    Args:
        dataset_dir (str): root directory for dataset.
        image_dir (str): directory for images.
        anno_path (str): coco annotation file path.
        data_fields (list): key name of data dictionary, at least have 'image'.
        sample_num (int): number of samples to load, -1 means all.
        load_crowd (bool): whether to load crowded ground-truth. 
            False as default
        allow_empty (bool): whether to load empty entry. False as default
        empty_ratio (float): the ratio of empty record number to total 
            record's, if empty_ratio is out of [0. ,1.), do not sample the 
            records. 1. as default
    """
    def __init__(self,
                 dataset_dir=None,
                 image_dir=None,
                 anno_path=None,
                 data_fields=['image'],
                 sample_num=-1,
                 load_crowd=False,
                 allow_empty=False,
                 empty_ratio=1.):
        super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
                                          data_fields, sample_num)
        self.load_image_only = False
        self.load_semantic = False
        self.load_crowd = load_crowd
        self.allow_empty = allow_empty
        self.empty_ratio = empty_ratio
    def _sample_empty(self, records, num):
        # if empty_ratio is out of [0. ,1.), do not sample the records
        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
            return records
        import random
        sample_num = int(num * self.empty_ratio / (1 - self.empty_ratio))
        records = random.sample(records, sample_num)
        return records
    def parse_dataset(self):
        anno_path = os.path.join(self.dataset_dir, self.anno_path)
        image_dir = os.path.join(self.dataset_dir, self.image_dir)
        assert anno_path.endswith('.json'), \
            'invalid coco annotation file: ' + anno_path
        from pycocotools.coco import COCO
        coco = COCO(anno_path)
        img_ids = coco.getImgIds()
        img_ids.sort()
        cat_ids = coco.getCatIds()
        records = []
        empty_records = []
        ct = 0
        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
        self.cname2cid = dict({
            coco.loadCats(catid)[0]['name']: clsid
            for catid, clsid in self.catid2clsid.items()
        })
        if 'annotations' not in coco.dataset:
            self.load_image_only = True
            logger.warning('Annotation file: {} does not contains ground truth '
                           'and load image information only.'.format(anno_path))
        for img_id in img_ids:
            img_anno = coco.loadImgs([img_id])[0]
            im_fname = img_anno['file_name']
            im_w = float(img_anno['width'])
            im_h = float(img_anno['height'])
            im_path = os.path.join(image_dir,
                                   im_fname) if image_dir else im_fname
            is_empty = False
            if not os.path.exists(im_path):
                logger.warning('Illegal image file: {}, and it will be '
                               'ignored'.format(im_path))
                continue
            if im_w < 0 or im_h < 0:
                logger.warning('Illegal width: {} or height: {} in annotation, '
                               'and im_id: {} will be ignored'.format(
                                   im_w, im_h, img_id))
                continue
            coco_rec = {
                'im_file': im_path,
                'im_id': np.array([img_id]),
                'h': im_h,
                'w': im_w,
            } if 'image' in self.data_fields else {}
            if not self.load_image_only:
                ins_anno_ids = coco.getAnnIds(
                    imgIds=[img_id], iscrowd=None if self.load_crowd else False)
                instances = coco.loadAnns(ins_anno_ids)
                bboxes = []
                is_rbox_anno = False
                for inst in instances:
                    # check gt bbox
                    if inst.get('ignore', False):
                        continue
                    if 'bbox' not in inst.keys():
                        continue
                    else:
                        if not any(np.array(inst['bbox'])):
                            continue
                    # read rbox anno or not
                    is_rbox_anno = True if len(inst['bbox']) == 5 else False
                    if is_rbox_anno:
                        xc, yc, box_w, box_h, angle = inst['bbox']
                        x1 = xc - box_w / 2.0
                        y1 = yc - box_h / 2.0
                        x2 = x1 + box_w
                        y2 = y1 + box_h
                    else:
                        x1, y1, box_w, box_h = inst['bbox']
                        x2 = x1 + box_w
                        y2 = y1 + box_h
                    eps = 1e-5
                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
                        inst['clean_bbox'] = [
                            round(float(x), 3) for x in [x1, y1, x2, y2]
                        ]
                        if is_rbox_anno:
                            inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
                        bboxes.append(inst)
                    else:
                        logger.warning(
                            'Found an invalid bbox in annotations: im_id: {}, '
                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
                                img_id, float(inst['area']), x1, y1, x2, y2))
                num_bbox = len(bboxes)
                if num_bbox <= 0 and not self.allow_empty:
                    continue
                elif num_bbox <= 0:
                    is_empty = True
                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
                if is_rbox_anno:
                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
                gt_poly = [None] * num_bbox
                has_segmentation = False
                for i, box in enumerate(bboxes):
                    catid = box['category_id']
                    gt_class[i][0] = self.catid2clsid[catid]
                    gt_bbox[i, :] = box['clean_bbox']
                    # xc, yc, w, h, theta
                    if is_rbox_anno:
                        gt_rbox[i, :] = box['clean_rbox']
                    is_crowd[i][0] = box['iscrowd']
                    # check RLE format 
                    if 'segmentation' in box and box['iscrowd'] == 1:
                        gt_poly[i] = [[0.0, 0.0], ]
                    elif 'segmentation' in box and box['segmentation']:
                        gt_poly[i] = box['segmentation']
                        has_segmentation = True
                if has_segmentation and not any(
                        gt_poly) and not self.allow_empty:
                    continue
                if is_rbox_anno:
                    gt_rec = {
                        'is_crowd': is_crowd,
                        'gt_class': gt_class,
                        'gt_bbox': gt_bbox,
                        'gt_rbox': gt_rbox,
                        'gt_poly': gt_poly,
                    }
                else:
                    gt_rec = {
                        'is_crowd': is_crowd,
                        'gt_class': gt_class,
                        'gt_bbox': gt_bbox,
                        'gt_poly': gt_poly,
                    }
                for k, v in gt_rec.items():
                    if k in self.data_fields:
                        coco_rec[k] = v
                # TODO: remove load_semantic
                if self.load_semantic and 'semantic' in self.data_fields:
                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
                                            'train2017', im_fname[:-3] + 'png')
                    coco_rec.update({'semantic': seg_path})
            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
                im_path, img_id, im_h, im_w))
            if is_empty:
                empty_records.append(coco_rec)
            else:
                records.append(coco_rec)
            ct += 1
            if self.sample_num > 0 and ct >= self.sample_num:
                break
        assert ct > 0, 'not found any coco record in %s' % (anno_path)
        logger.debug('{} samples in file {}'.format(ct, anno_path))
        if len(empty_records) > 0:
            empty_records = self._sample_empty(empty_records, len(records))
            records += empty_records
        self.roidbs = records
--- a/build/lib/ppdet/data/source/dataset.py
+++ b/build/lib/ppdet/data/source/dataset.py
@ -0,0 +1,192 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import os
 import numpy as np
 try:
    from collections.abc import Sequence
 except Exception:
    from collections import Sequence
 from paddle.io import Dataset
 from ppdet.core.workspace import register, serializable
 from ppdet.utils.download import get_dataset_path
 import copy
@serializable
 class DetDataset(Dataset):
    """
    Load detection dataset.
    Args:
        dataset_dir (str): root directory for dataset.
        image_dir (str): directory for images.
        anno_path (str): annotation file path.
        data_fields (list): key name of data dictionary, at least have 'image'.
        sample_num (int): number of samples to load, -1 means all.
        use_default_label (bool): whether to load default label list.
    """
    def __init__(self,
                 dataset_dir=None,
                 image_dir=None,
                 anno_path=None,
                 data_fields=['image'],
                 sample_num=-1,
                 use_default_label=None,
                 **kwargs):
        super(DetDataset, self).__init__()
        self.dataset_dir = dataset_dir if dataset_dir is not None else ''
        self.anno_path = anno_path
        self.image_dir = image_dir if image_dir is not None else ''
        self.data_fields = data_fields
        self.sample_num = sample_num
        self.use_default_label = use_default_label
        self._epoch = 0
        self._curr_iter = 0
    def __len__(self, ):
        return len(self.roidbs)
    def __getitem__(self, idx):
        # data batch
        roidb = copy.deepcopy(self.roidbs[idx])
        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
            n = len(self.roidbs)
            idx = np.random.randint(n)
            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
            n = len(self.roidbs)
            idx = np.random.randint(n)
            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
            n = len(self.roidbs)
            roidb = [roidb, ] + [
                copy.deepcopy(self.roidbs[np.random.randint(n)])
                for _ in range(3)
            ]
        if isinstance(roidb, Sequence):
            for r in roidb:
                r['curr_iter'] = self._curr_iter
        else:
            roidb['curr_iter'] = self._curr_iter
        self._curr_iter += 1
        return self.transform(roidb)
    def check_or_download_dataset(self):
        self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
                                            self.image_dir)
    def set_kwargs(self, **kwargs):
        self.mixup_epoch = kwargs.get('mixup_epoch', -1)
        self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
        self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
    def set_transform(self, transform):
        self.transform = transform
    def set_epoch(self, epoch_id):
        self._epoch = epoch_id
    def parse_dataset(self, ):
        raise NotImplementedError(
            "Need to implement parse_dataset method of Dataset")
    def get_anno(self):
        if self.anno_path is None:
            return
        return os.path.join(self.dataset_dir, self.anno_path)
 def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
    return f.lower().endswith(extensions)
 def _make_dataset(dir):
    dir = os.path.expanduser(dir)
    if not os.path.isdir(dir):
        raise ('{} should be a dir'.format(dir))
    images = []
    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
        for fname in sorted(fnames):
            path = os.path.join(root, fname)
            if _is_valid_file(path):
                images.append(path)
    return images
@register
@serializable
 class ImageFolder(DetDataset):
    def __init__(self,
                 dataset_dir=None,
                 image_dir=None,
                 anno_path=None,
                 sample_num=-1,
                 use_default_label=None,
                 **kwargs):
        super(ImageFolder, self).__init__(
            dataset_dir,
            image_dir,
            anno_path,
            sample_num=sample_num,
            use_default_label=use_default_label)
        self._imid2path = {}
        self.roidbs = None
        self.sample_num = sample_num
    def check_or_download_dataset(self):
        return
    def parse_dataset(self, ):
        if not self.roidbs:
            self.roidbs = self._load_images()
    def _parse(self):
        image_dir = self.image_dir
        if not isinstance(image_dir, Sequence):
            image_dir = [image_dir]
        images = []
        for im_dir in image_dir:
            if os.path.isdir(im_dir):
                im_dir = os.path.join(self.dataset_dir, im_dir)
                images.extend(_make_dataset(im_dir))
            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
                images.append(im_dir)
        return images
    def _load_images(self):
        images = self._parse()
        ct = 0
        records = []
        for image in images:
            assert image != '' and os.path.isfile(image), \
                    "Image {} not found".format(image)
            if self.sample_num > 0 and ct >= self.sample_num:
                break
            rec = {'im_id': np.array([ct]), 'im_file': image}
            self._imid2path[ct] = image
            ct += 1
            records.append(rec)
        assert len(records) > 0, "No image file found"
        return records
    def get_imid2path(self):
        return self._imid2path
    def set_images(self, images):
        self.image_dir = images
        self.roidbs = self._load_images()
--- a/build/lib/ppdet/data/source/keypoint_coco.py
+++ b/build/lib/ppdet/data/source/keypoint_coco.py
@ -0,0 +1,656 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import os
 import cv2
 import numpy as np
 import json
 import copy
 import pycocotools
 from pycocotools.coco import COCO
 from .dataset import DetDataset
 from ppdet.core.workspace import register, serializable
@serializable
 class KeypointBottomUpBaseDataset(DetDataset):
    """Base class for bottom-up datasets.
    All datasets should subclass it.
    All subclasses should overwrite:
        Methods:`_get_imganno`
    Args:
        dataset_dir (str): Root path to the dataset.
        anno_path (str): Relative path to the annotation file.
        image_dir (str): Path to a directory where images are held.
            Default: None.
        num_joints (int): keypoint numbers
        transform (composed(operators)): A sequence of data transforms.
        shard (list): [rank, worldsize], the distributed env params
        test_mode (bool): Store True when building test or
            validation dataset. Default: False.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 transform=[],
                 shard=[0, 1],
                 test_mode=False):
        super().__init__(dataset_dir, image_dir, anno_path)
        self.image_info = {}
        self.ann_info = {}
        self.img_prefix = os.path.join(dataset_dir, image_dir)
        self.transform = transform
        self.test_mode = test_mode
        self.ann_info['num_joints'] = num_joints
        self.img_ids = []
    def __len__(self):
        """Get dataset length."""
        return len(self.img_ids)
    def _get_imganno(self, idx):
        """Get anno for a single image."""
        raise NotImplementedError
    def __getitem__(self, idx):
        """Prepare image for training given the index."""
        records = copy.deepcopy(self._get_imganno(idx))
        records['image'] = cv2.imread(records['image_file'])
        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
        records['mask'] = (records['mask'] + 0).astype('uint8')
        records = self.transform(records)
        return records
    def parse_dataset(self):
        return
@register
@serializable
 class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
    """COCO dataset for bottom-up pose estimation.
    The dataset loads raw features and apply specified transforms
    to return a dict containing the image tensors and other information.
    COCO keypoint indexes::
        0: 'nose',
        1: 'left_eye',
        2: 'right_eye',
        3: 'left_ear',
        4: 'right_ear',
        5: 'left_shoulder',
        6: 'right_shoulder',
        7: 'left_elbow',
        8: 'right_elbow',
        9: 'left_wrist',
        10: 'right_wrist',
        11: 'left_hip',
        12: 'right_hip',
        13: 'left_knee',
        14: 'right_knee',
        15: 'left_ankle',
        16: 'right_ankle'
    Args:
        dataset_dir (str): Root path to the dataset.
        anno_path (str): Relative path to the annotation file.
        image_dir (str): Path to a directory where images are held.
            Default: None.
        num_joints (int): keypoint numbers
        transform (composed(operators)): A sequence of data transforms.
        shard (list): [rank, worldsize], the distributed env params
        test_mode (bool): Store True when building test or
            validation dataset. Default: False.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 transform=[],
                 shard=[0, 1],
                 test_mode=False):
        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
                         transform, shard, test_mode)
        ann_file = os.path.join(dataset_dir, anno_path)
        self.coco = COCO(ann_file)
        self.img_ids = self.coco.getImgIds()
        if not test_mode:
            self.img_ids = [
                img_id for img_id in self.img_ids
                if len(self.coco.getAnnIds(
                    imgIds=img_id, iscrowd=None)) > 0
            ]
        blocknum = int(len(self.img_ids) / shard[1])
        self.img_ids = self.img_ids[(blocknum * shard[0]):(blocknum * (shard[0]
                                                                       + 1))]
        self.num_images = len(self.img_ids)
        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
        self.dataset_name = 'coco'
        cat_ids = self.coco.getCatIds()
        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
        print(f'=> num_images: {self.num_images}')
    @staticmethod
    def _get_mapping_id_name(imgs):
        """
        Args:
            imgs (dict): dict of image info.
        Returns:
            tuple: Image name & id mapping dicts.
            - id2name (dict): Mapping image id to name.
            - name2id (dict): Mapping image name to id.
        """
        id2name = {}
        name2id = {}
        for image_id, image in imgs.items():
            file_name = image['file_name']
            id2name[image_id] = file_name
            name2id[file_name] = image_id
        return id2name, name2id
    def _get_imganno(self, idx):
        """Get anno for a single image.
        Args:
            idx (int): image idx
        Returns:
            dict: info for model training
        """
        coco = self.coco
        img_id = self.img_ids[idx]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anno = coco.loadAnns(ann_ids)
        mask = self._get_mask(anno, idx)
        anno = [
            obj for obj in anno
            if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
        ]
        joints, orgsize = self._get_joints(anno, idx)
        db_rec = {}
        db_rec['im_id'] = img_id
        db_rec['image_file'] = os.path.join(self.img_prefix,
                                            self.id2name[img_id])
        db_rec['mask'] = mask
        db_rec['joints'] = joints
        db_rec['im_shape'] = orgsize
        return db_rec
    def _get_joints(self, anno, idx):
        """Get joints for all people in an image."""
        num_people = len(anno)
        joints = np.zeros(
            (num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
        for i, obj in enumerate(anno):
            joints[i, :self.ann_info['num_joints'], :3] = \
                np.array(obj['keypoints']).reshape([-1, 3])
        img_info = self.coco.loadImgs(self.img_ids[idx])[0]
        joints[..., 0] /= img_info['width']
        joints[..., 1] /= img_info['height']
        orgsize = np.array([img_info['height'], img_info['width']])
        return joints, orgsize
    def _get_mask(self, anno, idx):
        """Get ignore masks to mask out losses."""
        coco = self.coco
        img_info = coco.loadImgs(self.img_ids[idx])[0]
        m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
        for obj in anno:
            if 'segmentation' in obj:
                if obj['iscrowd']:
                    rle = pycocotools.mask.frPyObjects(obj['segmentation'],
                                                       img_info['height'],
                                                       img_info['width'])
                    m += pycocotools.mask.decode(rle)
                elif obj['num_keypoints'] == 0:
                    rles = pycocotools.mask.frPyObjects(obj['segmentation'],
                                                        img_info['height'],
                                                        img_info['width'])
                    for rle in rles:
                        m += pycocotools.mask.decode(rle)
        return m < 0.5
@register
@serializable
 class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
    """CrowdPose dataset for bottom-up pose estimation.
    The dataset loads raw features and apply specified transforms
    to return a dict containing the image tensors and other information.
    CrowdPose keypoint indexes::
        0: 'left_shoulder',
        1: 'right_shoulder',
        2: 'left_elbow',
        3: 'right_elbow',
        4: 'left_wrist',
        5: 'right_wrist',
        6: 'left_hip',
        7: 'right_hip',
        8: 'left_knee',
        9: 'right_knee',
        10: 'left_ankle',
        11: 'right_ankle',
        12: 'top_head',
        13: 'neck'
    Args:
        dataset_dir (str): Root path to the dataset.
        anno_path (str): Relative path to the annotation file.
        image_dir (str): Path to a directory where images are held.
            Default: None.
        num_joints (int): keypoint numbers
        transform (composed(operators)): A sequence of data transforms.
        shard (list): [rank, worldsize], the distributed env params
        test_mode (bool): Store True when building test or
            validation dataset. Default: False.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 transform=[],
                 shard=[0, 1],
                 test_mode=False):
        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
                         transform, shard, test_mode)
        ann_file = os.path.join(dataset_dir, anno_path)
        self.coco = COCO(ann_file)
        self.img_ids = self.coco.getImgIds()
        if not test_mode:
            self.img_ids = [
                img_id for img_id in self.img_ids
                if len(self.coco.getAnnIds(
                    imgIds=img_id, iscrowd=None)) > 0
            ]
        blocknum = int(len(self.img_ids) / shard[1])
        self.img_ids = self.img_ids[(blocknum * shard[0]):(blocknum * (shard[0]
                                                                       + 1))]
        self.num_images = len(self.img_ids)
        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
        self.dataset_name = 'crowdpose'
        print('=> num_images: {}'.format(self.num_images))
@serializable
 class KeypointTopDownBaseDataset(DetDataset):
    """Base class for top_down datasets.
    All datasets should subclass it.
    All subclasses should overwrite:
        Methods:`_get_db`
    Args:
        dataset_dir (str): Root path to the dataset.
        image_dir (str): Path to a directory where images are held.
        anno_path (str): Relative path to the annotation file.
        num_joints (int): keypoint numbers
        transform (composed(operators)): A sequence of data transforms.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 transform=[]):
        super().__init__(dataset_dir, image_dir, anno_path)
        self.image_info = {}
        self.ann_info = {}
        self.img_prefix = os.path.join(dataset_dir, image_dir)
        self.transform = transform
        self.ann_info['num_joints'] = num_joints
        self.db = []
    def __len__(self):
        """Get dataset length."""
        return len(self.db)
    def _get_db(self):
        """Get a sample"""
        raise NotImplementedError
    def __getitem__(self, idx):
        """Prepare sample for training given the index."""
        records = copy.deepcopy(self.db[idx])
        records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
                                      cv2.IMREAD_IGNORE_ORIENTATION)
        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
        records['score'] = records['score'] if 'score' in records else 1
        records = self.transform(records)
        # print('records', records)
        return records
@register
@serializable
 class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
    """COCO dataset for top-down pose estimation.
    The dataset loads raw features and apply specified transforms
    to return a dict containing the image tensors and other information.
    COCO keypoint indexes:
        0: 'nose',
        1: 'left_eye',
        2: 'right_eye',
        3: 'left_ear',
        4: 'right_ear',
        5: 'left_shoulder',
        6: 'right_shoulder',
        7: 'left_elbow',
        8: 'right_elbow',
        9: 'left_wrist',
        10: 'right_wrist',
        11: 'left_hip',
        12: 'right_hip',
        13: 'left_knee',
        14: 'right_knee',
        15: 'left_ankle',
        16: 'right_ankle'
    Args:
        dataset_dir (str): Root path to the dataset.
        image_dir (str): Path to a directory where images are held.
        anno_path (str): Relative path to the annotation file.
        num_joints (int): Keypoint numbers
        trainsize (list):[w, h] Image target size
        transform (composed(operators)): A sequence of data transforms.
        bbox_file (str): Path to a detection bbox file
            Default: None.
        use_gt_bbox (bool): Whether to use ground truth bbox
            Default: True.
        pixel_std (int): The pixel std of the scale
            Default: 200.
        image_thre (float): The threshold to filter the detection box
            Default: 0.0.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 trainsize,
                 transform=[],
                 bbox_file=None,
                 use_gt_bbox=True,
                 pixel_std=200,
                 image_thre=0.0):
        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
                         transform)
        self.bbox_file = bbox_file
        self.use_gt_bbox = use_gt_bbox
        self.trainsize = trainsize
        self.pixel_std = pixel_std
        self.image_thre = image_thre
        self.dataset_name = 'coco'
    def parse_dataset(self):
        if self.use_gt_bbox:
            self.db = self._load_coco_keypoint_annotations()
        else:
            self.db = self._load_coco_person_detection_results()
    def _load_coco_keypoint_annotations(self):
        coco = COCO(self.get_anno())
        img_ids = coco.getImgIds()
        gt_db = []
        for index in img_ids:
            im_ann = coco.loadImgs(index)[0]
            width = im_ann['width']
            height = im_ann['height']
            file_name = im_ann['file_name']
            im_id = int(im_ann["id"])
            annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
            objs = coco.loadAnns(annIds)
            valid_objs = []
            for obj in objs:
                x, y, w, h = obj['bbox']
                x1 = np.max((0, x))
                y1 = np.max((0, y))
                x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
                y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
                if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
                    obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
                    valid_objs.append(obj)
            objs = valid_objs
            rec = []
            for obj in objs:
                if max(obj['keypoints']) == 0:
                    continue
                joints = np.zeros(
                    (self.ann_info['num_joints'], 3), dtype=np.float)
                joints_vis = np.zeros(
                    (self.ann_info['num_joints'], 3), dtype=np.float)
                for ipt in range(self.ann_info['num_joints']):
                    joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
                    joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
                    joints[ipt, 2] = 0
                    t_vis = obj['keypoints'][ipt * 3 + 2]
                    if t_vis > 1:
                        t_vis = 1
                    joints_vis[ipt, 0] = t_vis
                    joints_vis[ipt, 1] = t_vis
                    joints_vis[ipt, 2] = 0
                center, scale = self._box2cs(obj['clean_bbox'][:4])
                rec.append({
                    'image_file': os.path.join(self.img_prefix, file_name),
                    'center': center,
                    'scale': scale,
                    'joints': joints,
                    'joints_vis': joints_vis,
                    'im_id': im_id,
                })
            gt_db.extend(rec)
        return gt_db
    def _box2cs(self, box):
        x, y, w, h = box[:4]
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5
        aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
        if w > aspect_ratio * h:
            h = w * 1.0 / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio
        scale = np.array(
            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
            dtype=np.float32)
        if center[0] != -1:
            scale = scale * 1.25
        return center, scale
    def _load_coco_person_detection_results(self):
        all_boxes = None
        bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
        with open(bbox_file_path, 'r') as f:
            all_boxes = json.load(f)
        if not all_boxes:
            print('=> Load %s fail!' % bbox_file_path)
            return None
        kpt_db = []
        for n_img in range(0, len(all_boxes)):
            det_res = all_boxes[n_img]
            if det_res['category_id'] != 1:
                continue
            file_name = det_res[
                'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
                    'image_id']
            img_name = os.path.join(self.img_prefix, file_name)
            box = det_res['bbox']
            score = det_res['score']
            im_id = int(det_res['image_id'])
            if score < self.image_thre:
                continue
            center, scale = self._box2cs(box)
            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
            joints_vis = np.ones(
                (self.ann_info['num_joints'], 3), dtype=np.float)
            kpt_db.append({
                'image_file': img_name,
                'im_id': im_id,
                'center': center,
                'scale': scale,
                'score': score,
                'joints': joints,
                'joints_vis': joints_vis,
            })
        return kpt_db
@register
@serializable
 class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
    """MPII dataset for topdown pose estimation.
    The dataset loads raw features and apply specified transforms
    to return a dict containing the image tensors and other information.
    MPII keypoint indexes::
        0: 'right_ankle',
        1: 'right_knee',
        2: 'right_hip',
        3: 'left_hip',
        4: 'left_knee',
        5: 'left_ankle',
        6: 'pelvis',
        7: 'thorax',
        8: 'upper_neck',
        9: 'head_top',
        10: 'right_wrist',
        11: 'right_elbow',
        12: 'right_shoulder',
        13: 'left_shoulder',
        14: 'left_elbow',
        15: 'left_wrist',
    Args:
        dataset_dir (str): Root path to the dataset.
        image_dir (str): Path to a directory where images are held.
        anno_path (str): Relative path to the annotation file.
        num_joints (int): Keypoint numbers
        trainsize (list):[w, h] Image target size
        transform (composed(operators)): A sequence of data transforms.
    """
    def __init__(self,
                 dataset_dir,
                 image_dir,
                 anno_path,
                 num_joints,
                 transform=[]):
        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
                         transform)
        self.dataset_name = 'mpii'
    def parse_dataset(self):
        with open(self.get_anno()) as anno_file:
            anno = json.load(anno_file)
        gt_db = []
        for a in anno:
            image_name = a['image']
            im_id = a['image_id'] if 'image_id' in a else int(
                os.path.splitext(image_name)[0])
            c = np.array(a['center'], dtype=np.float)
            s = np.array([a['scale'], a['scale']], dtype=np.float)
            # Adjust center/scale slightly to avoid cropping limbs
            if c[0] != -1:
                c[1] = c[1] + 15 * s[1]
                s = s * 1.25
            c = c - 1
            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
            joints_vis = np.zeros(
                (self.ann_info['num_joints'], 3), dtype=np.float)
            if 'joints' in a:
                joints_ = np.array(a['joints'])
                joints_[:, 0:2] = joints_[:, 0:2] - 1
                joints_vis_ = np.array(a['joints_vis'])
                assert len(joints_) == self.ann_info[
                    'num_joints'], 'joint num diff: {} vs {}'.format(
                        len(joints_), self.ann_info['num_joints'])
                joints[:, 0:2] = joints_[:, 0:2]
                joints_vis[:, 0] = joints_vis_[:]
                joints_vis[:, 1] = joints_vis_[:]
            gt_db.append({
                'image_file': os.path.join(self.img_prefix, image_name),
                'im_id': im_id,
                'center': c,
                'scale': s,
                'joints': joints,
                'joints_vis': joints_vis
            })
        self.db = gt_db
--- a/build/lib/ppdet/data/source/mot.py
+++ b/build/lib/ppdet/data/source/mot.py
@ -0,0 +1,360 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import cv2
 import numpy as np
 from collections import OrderedDict
 try:
    from collections.abc import Sequence
 except Exception:
    from collections import Sequence
 from .dataset import DetDataset, _make_dataset, _is_valid_file
 from ppdet.core.workspace import register, serializable
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@register
@serializable
 class MOTDataSet(DetDataset):
    """
    Load dataset with MOT format.
    Args:
        dataset_dir (str): root directory for dataset.
        image_lists (str|list): mot data image lists, muiti-source mot dataset.
        data_fields (list): key name of data dictionary, at least have 'image'.
        sample_num (int): number of samples to load, -1 means all.
    Notes:
        MOT datasets root directory following this:
            dataset/mot
            |——————image_lists
            |        |——————caltech.train  
            |        |——————caltech.val   
            |        |——————mot16.train  
            |        |——————mot17.train  
            |        ......
            |——————Caltech
            |——————MOT17
            |——————......
        All the MOT datasets have the following structure:
            Caltech
            |——————images
            |        └——————00001.jpg
            |        |—————— ...
            |        └——————0000N.jpg
            └——————labels_with_ids
                        └——————00001.txt
                        |—————— ...
                        └——————0000N.txt
            or
            MOT17
            |——————images
            |        └——————train
            |        └——————test
            └——————labels_with_ids
                        └——————train
    """
    def __init__(self,
                 dataset_dir=None,
                 image_lists=[],
                 data_fields=['image'],
                 sample_num=-1):
        super(MOTDataSet, self).__init__(
            dataset_dir=dataset_dir,
            data_fields=data_fields,
            sample_num=sample_num)
        self.dataset_dir = dataset_dir
        self.image_lists = image_lists
        if isinstance(self.image_lists, str):
            self.image_lists = [self.image_lists]
        self.roidbs = None
        self.cname2cid = None
    def get_anno(self):
        if self.image_lists == []:
            return
        # only used to get categories and metric
        return os.path.join(self.dataset_dir, 'image_lists',
                            self.image_lists[0])
    def parse_dataset(self):
        self.img_files = OrderedDict()
        self.img_start_index = OrderedDict()
        self.label_files = OrderedDict()
        self.tid_num = OrderedDict()
        self.tid_start_index = OrderedDict()
        img_index = 0
        for data_name in self.image_lists:
            # check every data image list
            image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
            assert os.path.isdir(image_lists_dir), \
                "The {} is not a directory.".format(image_lists_dir)
            list_path = os.path.join(image_lists_dir, data_name)
            assert os.path.exists(list_path), \
                "The list path {} does not exist.".format(list_path)
            # record img_files, filter out empty ones
            with open(list_path, 'r') as file:
                self.img_files[data_name] = file.readlines()
                self.img_files[data_name] = [
                    os.path.join(self.dataset_dir, x.strip())
                    for x in self.img_files[data_name]
                ]
                self.img_files[data_name] = list(
                    filter(lambda x: len(x) > 0, self.img_files[data_name]))
                self.img_start_index[data_name] = img_index
                img_index += len(self.img_files[data_name])
            # record label_files
            self.label_files[data_name] = [
                x.replace('images', 'labels_with_ids').replace(
                    '.png', '.txt').replace('.jpg', '.txt')
                for x in self.img_files[data_name]
            ]
        for data_name, label_paths in self.label_files.items():
            max_index = -1
            for lp in label_paths:
                lb = np.loadtxt(lp)
                if len(lb) < 1:
                    continue
                if len(lb.shape) < 2:
                    img_max = lb[1]
                else:
                    img_max = np.max(lb[:, 1])
                if img_max > max_index:
                    max_index = img_max
            self.tid_num[data_name] = int(max_index + 1)
        last_index = 0
        for i, (k, v) in enumerate(self.tid_num.items()):
            self.tid_start_index[k] = last_index
            last_index += v
        self.total_identities = int(last_index + 1)
        self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
        self.total_imgs = sum(self.num_imgs_each_data)
        logger.info('=' * 80)
        logger.info('MOT dataset summary: ')
        logger.info(self.tid_num)
        logger.info('total images: {}'.format(self.total_imgs))
        logger.info('image start index: {}'.format(self.img_start_index))
        logger.info('total identities: {}'.format(self.total_identities))
        logger.info('identity start index: {}'.format(self.tid_start_index))
        logger.info('=' * 80)
        records = []
        cname2cid = mot_label()
        for img_index in range(self.total_imgs):
            for i, (k, v) in enumerate(self.img_start_index.items()):
                if img_index >= v:
                    data_name = list(self.label_files.keys())[i]
                    start_index = v
            img_file = self.img_files[data_name][img_index - start_index]
            lbl_file = self.label_files[data_name][img_index - start_index]
            if not os.path.exists(img_file):
                logger.warning('Illegal image file: {}, and it will be ignored'.
                               format(img_file))
                continue
            if not os.path.isfile(lbl_file):
                logger.warning('Illegal label file: {}, and it will be ignored'.
                               format(lbl_file))
                continue
            labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
            # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
            cx, cy = labels[:, 2], labels[:, 3]
            w, h = labels[:, 4], labels[:, 5]
            gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
            gt_class = labels[:, 0:1].astype('int32')
            gt_score = np.ones((len(labels), 1)).astype('float32')
            gt_ide = labels[:, 1:2].astype('int32')
            for i, _ in enumerate(gt_ide):
                if gt_ide[i] > -1:
                    gt_ide[i] += self.tid_start_index[data_name]
            mot_rec = {
                'im_file': img_file,
                'im_id': img_index,
            } if 'image' in self.data_fields else {}
            gt_rec = {
                'gt_class': gt_class,
                'gt_score': gt_score,
                'gt_bbox': gt_bbox,
                'gt_ide': gt_ide,
            }
            for k, v in gt_rec.items():
                if k in self.data_fields:
                    mot_rec[k] = v
            records.append(mot_rec)
            if self.sample_num > 0 and img_index >= self.sample_num:
                break
        assert len(records) > 0, 'not found any mot record in %s' % (
            self.image_lists)
        self.roidbs, self.cname2cid = records, cname2cid
 def mot_label():
    labels_map = {'person': 0}
    return labels_map
@register
@serializable
 class MOTImageFolder(DetDataset):
    def __init__(self,
                 task,
                 dataset_dir=None,
                 data_root=None,
                 image_dir=None,
                 sample_num=-1,
                 keep_ori_im=False,
                 **kwargs):
        super(MOTImageFolder, self).__init__(
            dataset_dir, image_dir, sample_num=sample_num)
        self.task = task
        self.data_root = data_root
        self.keep_ori_im = keep_ori_im
        self._imid2path = {}
        self.roidbs = None
    def check_or_download_dataset(self):
        return
    def parse_dataset(self, ):
        if not self.roidbs:
            self.roidbs = self._load_images()
    def _parse(self):
        image_dir = self.image_dir
        if not isinstance(image_dir, Sequence):
            image_dir = [image_dir]
        images = []
        for im_dir in image_dir:
            if os.path.isdir(im_dir):
                im_dir = os.path.join(self.dataset_dir, im_dir)
                images.extend(_make_dataset(im_dir))
            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
                images.append(im_dir)
        return images
    def _load_images(self):
        images = self._parse()
        ct = 0
        records = []
        for image in images:
            assert image != '' and os.path.isfile(image), \
                    "Image {} not found".format(image)
            if self.sample_num > 0 and ct >= self.sample_num:
                break
            rec = {'im_id': np.array([ct]), 'im_file': image}
            if self.keep_ori_im:
                rec.update({'keep_ori_im': 1})
            self._imid2path[ct] = image
            ct += 1
            records.append(rec)
        assert len(records) > 0, "No image file found"
        return records
    def get_imid2path(self):
        return self._imid2path
    def set_images(self, images):
        self.image_dir = images
        self.roidbs = self._load_images()
 def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
    return f.lower().endswith(extensions)
@register
@serializable
 class MOTVideoDataset(DetDataset):
    """
    Load MOT dataset with MOT format from video for inference.
    Args:
        video_file (str): path of the video file
        dataset_dir (str): root directory for dataset.
        keep_ori_im (bool): whether to keep original image, default False. 
            Set True when used during MOT model inference while saving
            images or video, or used in DeepSORT.
    """
    def __init__(self,
                 video_file='',
                 dataset_dir=None,
                 keep_ori_im=False,
                 **kwargs):
        super(MOTVideoDataset, self).__init__(dataset_dir=dataset_dir)
        self.video_file = video_file
        self.dataset_dir = dataset_dir
        self.keep_ori_im = keep_ori_im
        self.roidbs = None
    def parse_dataset(self, ):
        if not self.roidbs:
            self.roidbs = self._load_video_images()
    def _load_video_images(self):
        self.cap = cv2.VideoCapture(self.video_file)
        self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
        logger.info('Length of the video: {:d} frames'.format(self.vn))
        res = True
        ct = 0
        records = []
        while res:
            res, img = self.cap.read()
            image = np.ascontiguousarray(img, dtype=np.float32)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            im_shape = image.shape
            rec = {
                'im_id': np.array([ct]),
                'image': image,
                'h': im_shape[0],
                'w': im_shape[1],
                'im_shape': np.array(
                    im_shape[:2], dtype=np.float32),
                'scale_factor': np.array(
                    [1., 1.], dtype=np.float32),
            }
            if self.keep_ori_im:
                rec.update({'ori_image': image})
            ct += 1
            records.append(rec)
        records = records[:-1]
        assert len(records) > 0, "No image file found"
        return records
    def set_video(self, video_file):
        self.video_file = video_file
        assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
                "wrong or unsupported file format: {}".format(self.video_file)
        self.roidbs = self._load_video_images()
--- a/build/lib/ppdet/data/source/voc.py
+++ b/build/lib/ppdet/data/source/voc.py
@ -0,0 +1,205 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import numpy as np
 import xml.etree.ElementTree as ET
 from ppdet.core.workspace import register, serializable
 from .dataset import DetDataset
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@register
@serializable
 class VOCDataSet(DetDataset):
    """
    Load dataset with PascalVOC format.
    Notes:
    `anno_path` must contains xml file and image file path for annotations.
    Args:
        dataset_dir (str): root directory for dataset.
        image_dir (str): directory for images.
        anno_path (str): voc annotation file path.
        data_fields (list): key name of data dictionary, at least have 'image'.
        sample_num (int): number of samples to load, -1 means all.
        label_list (str): if use_default_label is False, will load
            mapping between category and class index.
    """
    def __init__(self,
                 dataset_dir=None,
                 image_dir=None,
                 anno_path=None,
                 data_fields=['image'],
                 sample_num=-1,
                 label_list=None):
        super(VOCDataSet, self).__init__(
            dataset_dir=dataset_dir,
            image_dir=image_dir,
            anno_path=anno_path,
            data_fields=data_fields,
            sample_num=sample_num)
        self.label_list = label_list
    def parse_dataset(self, ):
        anno_path = os.path.join(self.dataset_dir, self.anno_path)
        image_dir = os.path.join(self.dataset_dir, self.image_dir)
        # mapping category name to class id
        # first_class:0, second_class:1, ...
        records = []
        ct = 0
        cname2cid = {}
        if self.label_list:
            label_path = os.path.join(self.dataset_dir, self.label_list)
            if not os.path.exists(label_path):
                raise ValueError("label_list {} does not exists".format(
                    label_path))
            with open(label_path, 'r') as fr:
                label_id = 0
                for line in fr.readlines():
                    cname2cid[line.strip()] = label_id
                    label_id += 1
        else:
            cname2cid = pascalvoc_label()
        with open(anno_path, 'r') as fr:
            while True:
                line = fr.readline()
                if not line:
                    break
                img_file, xml_file = [os.path.join(image_dir, x) \
                        for x in line.strip().split()[:2]]
                if not os.path.exists(img_file):
                    logger.warning(
                        'Illegal image file: {}, and it will be ignored'.format(
                            img_file))
                    continue
                if not os.path.isfile(xml_file):
                    logger.warning(
                        'Illegal xml file: {}, and it will be ignored'.format(
                            xml_file))
                    continue
                tree = ET.parse(xml_file)
                if tree.find('id') is None:
                    im_id = np.array([ct])
                else:
                    im_id = np.array([int(tree.find('id').text)])
                objs = tree.findall('object')
                im_w = float(tree.find('size').find('width').text)
                im_h = float(tree.find('size').find('height').text)
                if im_w < 0 or im_h < 0:
                    logger.warning(
                        'Illegal width: {} or height: {} in annotation, '
                        'and {} will be ignored'.format(im_w, im_h, xml_file))
                    continue
                gt_bbox = []
                gt_class = []
                gt_score = []
                difficult = []
                for i, obj in enumerate(objs):
                    cname = obj.find('name').text
                    # user dataset may not contain difficult field
                    _difficult = obj.find('difficult')
                    _difficult = int(
                        _difficult.text) if _difficult is not None else 0
                    x1 = float(obj.find('bndbox').find('xmin').text)
                    y1 = float(obj.find('bndbox').find('ymin').text)
                    x2 = float(obj.find('bndbox').find('xmax').text)
                    y2 = float(obj.find('bndbox').find('ymax').text)
                    x1 = max(0, x1)
                    y1 = max(0, y1)
                    x2 = min(im_w - 1, x2)
                    y2 = min(im_h - 1, y2)
                    if x2 > x1 and y2 > y1:
                        gt_bbox.append([x1, y1, x2, y2])
                        gt_class.append([cname2cid[cname]])
                        gt_score.append([1.])
                        difficult.append([_difficult])
                    else:
                        logger.warning(
                            'Found an invalid bbox in annotations: xml_file: {}'
                            ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
                                xml_file, x1, y1, x2, y2))
                gt_bbox = np.array(gt_bbox).astype('float32')
                gt_class = np.array(gt_class).astype('int32')
                gt_score = np.array(gt_score).astype('float32')
                difficult = np.array(difficult).astype('int32')
                voc_rec = {
                    'im_file': img_file,
                    'im_id': im_id,
                    'h': im_h,
                    'w': im_w
                } if 'image' in self.data_fields else {}
                gt_rec = {
                    'gt_class': gt_class,
                    'gt_score': gt_score,
                    'gt_bbox': gt_bbox,
                    'difficult': difficult
                }
                for k, v in gt_rec.items():
                    if k in self.data_fields:
                        voc_rec[k] = v
                if len(objs) != 0:
                    records.append(voc_rec)
                ct += 1
                if self.sample_num > 0 and ct >= self.sample_num:
                    break
        assert len(records) > 0, 'not found any voc record in %s' % (
            self.anno_path)
        logger.debug('{} samples in file {}'.format(ct, anno_path))
        self.roidbs, self.cname2cid = records, cname2cid
    def get_label_list(self):
        return os.path.join(self.dataset_dir, self.label_list)
 def pascalvoc_label():
    labels_map = {
        'aeroplane': 0,
        'bicycle': 1,
        'bird': 2,
        'boat': 3,
        'bottle': 4,
        'bus': 5,
        'car': 6,
        'cat': 7,
        'chair': 8,
        'cow': 9,
        'diningtable': 10,
        'dog': 11,
        'horse': 12,
        'motorbike': 13,
        'person': 14,
        'pottedplant': 15,
        'sheep': 16,
        'sofa': 17,
        'train': 18,
        'tvmonitor': 19
    }
    return labels_map
--- a/build/lib/ppdet/data/source/widerface.py
+++ b/build/lib/ppdet/data/source/widerface.py
@ -0,0 +1,180 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import numpy as np
 from ppdet.core.workspace import register, serializable
 from .dataset import DetDataset
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@register
@serializable
 class WIDERFaceDataSet(DetDataset):
    """
    Load WiderFace records with 'anno_path'
    Args:
        dataset_dir (str): root directory for dataset.
        image_dir (str): directory for images.
        anno_path (str): WiderFace annotation data.
        data_fields (list): key name of data dictionary, at least have 'image'.
        sample_num (int): number of samples to load, -1 means all.
        with_lmk (bool): whether to load face landmark keypoint labels.
    """
    def __init__(self,
                 dataset_dir=None,
                 image_dir=None,
                 anno_path=None,
                 data_fields=['image'],
                 sample_num=-1,
                 with_lmk=False):
        super(WIDERFaceDataSet, self).__init__(
            dataset_dir=dataset_dir,
            image_dir=image_dir,
            anno_path=anno_path,
            data_fields=data_fields,
            sample_num=sample_num,
            with_lmk=with_lmk)
        self.anno_path = anno_path
        self.sample_num = sample_num
        self.roidbs = None
        self.cname2cid = None
        self.with_lmk = with_lmk
    def parse_dataset(self):
        anno_path = os.path.join(self.dataset_dir, self.anno_path)
        image_dir = os.path.join(self.dataset_dir, self.image_dir)
        txt_file = anno_path
        records = []
        ct = 0
        file_lists = self._load_file_list(txt_file)
        cname2cid = widerface_label()
        for item in file_lists:
            im_fname = item[0]
            im_id = np.array([ct])
            gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
            gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
            gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
            lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
            for index_box in range(len(item)):
                if index_box < 1:
                    continue
                gt_bbox[index_box - 1] = item[index_box][0]
                if self.with_lmk:
                    gt_lmk_labels[index_box - 1] = item[index_box][1]
                    lmk_ignore_flag[index_box - 1] = item[index_box][2]
            im_fname = os.path.join(image_dir,
                                    im_fname) if image_dir else im_fname
            widerface_rec = {
                'im_file': im_fname,
                'im_id': im_id,
            } if 'image' in self.data_fields else {}
            gt_rec = {
                'gt_bbox': gt_bbox,
                'gt_class': gt_class,
            }
            for k, v in gt_rec.items():
                if k in self.data_fields:
                    widerface_rec[k] = v
            if self.with_lmk:
                widerface_rec['gt_keypoint'] = gt_lmk_labels
                widerface_rec['keypoint_ignore'] = lmk_ignore_flag
            if len(item) != 0:
                records.append(widerface_rec)
            ct += 1
            if self.sample_num > 0 and ct >= self.sample_num:
                break
        assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
        logger.debug('{} samples in file {}'.format(ct, anno_path))
        self.roidbs, self.cname2cid = records, cname2cid
    def _load_file_list(self, input_txt):
        with open(input_txt, 'r') as f_dir:
            lines_input_txt = f_dir.readlines()
        file_dict = {}
        num_class = 0
        exts = ['jpg', 'jpeg', 'png', 'bmp']
        exts += [ext.upper() for ext in exts]
        for i in range(len(lines_input_txt)):
            line_txt = lines_input_txt[i].strip('\n\t\r')
            split_str = line_txt.split(' ')
            if len(split_str) == 1:
                img_file_name = os.path.split(split_str[0])[1]
                split_txt = img_file_name.split('.')
                if len(split_txt) < 2:
                    continue
                elif split_txt[-1] in exts:
                    if i != 0:
                        num_class += 1
                    file_dict[num_class] = [line_txt]
            else:
                if len(line_txt) <= 6:
                    continue
                result_boxs = []
                xmin = float(split_str[0])
                ymin = float(split_str[1])
                w = float(split_str[2])
                h = float(split_str[3])
                # Filter out wrong labels
                if w < 0 or h < 0:
                    logger.warning('Illegal box with w: {}, h: {} in '
                                   'img: {}, and it will be ignored'.format(
                                       w, h, file_dict[num_class][0]))
                    continue
                xmin = max(0, xmin)
                ymin = max(0, ymin)
                xmax = xmin + w
                ymax = ymin + h
                gt_bbox = [xmin, ymin, xmax, ymax]
                result_boxs.append(gt_bbox)
                if self.with_lmk:
                    assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
                            'of characters per line in the annotation file should' \
                            'exceed 18.'
                    lmk0_x = float(split_str[5])
                    lmk0_y = float(split_str[6])
                    lmk1_x = float(split_str[8])
                    lmk1_y = float(split_str[9])
                    lmk2_x = float(split_str[11])
                    lmk2_y = float(split_str[12])
                    lmk3_x = float(split_str[14])
                    lmk3_y = float(split_str[15])
                    lmk4_x = float(split_str[17])
                    lmk4_y = float(split_str[18])
                    lmk_ignore_flag = 0 if lmk0_x == -1 else 1
                    gt_lmk_label = [
                        lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
                        lmk3_y, lmk4_x, lmk4_y
                    ]
                    result_boxs.append(gt_lmk_label)
                    result_boxs.append(lmk_ignore_flag)
                file_dict[num_class].append(result_boxs)
        return list(file_dict.values())
 def widerface_label():
    labels_map = {'face': 0}
    return labels_map
--- a/build/lib/ppdet/data/transform/init.py
+++ b/build/lib/ppdet/data/transform/init.py
@ -0,0 +1,28 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import operators
 from . import batch_operators
 from . import keypoint_operators
 from . import mot_operators
 from .operators import *
 from .batch_operators import *
 from .keypoint_operators import *
 from .mot_operators import *
 __all__ = []
 __all__ += registered_ops
 __all__ += keypoint_operators.__all__
 __all__ += mot_operators.__all__
--- a/build/lib/ppdet/data/transform/autoaugment_utils.py
+++ b/build/lib/ppdet/data/transform/autoaugment_utils.py
--- a/build/lib/ppdet/data/transform/batch_operators.py
+++ b/build/lib/ppdet/data/transform/batch_operators.py
@ -0,0 +1,748 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 try:
    from collections.abc import Sequence
 except Exception:
    from collections import Sequence
 import cv2
 import numpy as np
 from .operators import register_op, BaseOperator, Resize
 from .op_helper import jaccard_overlap, gaussian2D
 from scipy import ndimage
 from ppdet.modeling import bbox_utils
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
    'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
    'Gt2TTFTarget', 'Gt2Solov2Target'
 ]
@register_op
 class PadBatch(BaseOperator):
    """
    Pad a batch of samples so they can be divisible by a stride.
    The layout of each image should be 'CHW'.
    Args:
        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
            height and width is divisible by `pad_to_stride`.
    """
    def __init__(self, pad_to_stride=0):
        super(PadBatch, self).__init__()
        self.pad_to_stride = pad_to_stride
    def __call__(self, samples, context=None):
        """
        Args:
            samples (list): a batch of sample, each is dict.
        """
        coarsest_stride = self.pad_to_stride
        max_shape = np.array([data['image'].shape for data in samples]).max(
            axis=0)
        if coarsest_stride > 0:
            max_shape[1] = int(
                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
            max_shape[2] = int(
                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
        for data in samples:
            im = data['image']
            im_c, im_h, im_w = im.shape[:]
            padding_im = np.zeros(
                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
            padding_im[:, :im_h, :im_w] = im
            data['image'] = padding_im
            if 'semantic' in data and data['semantic'] is not None:
                semantic = data['semantic']
                padding_sem = np.zeros(
                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
                padding_sem[:, :im_h, :im_w] = semantic
                data['semantic'] = padding_sem
            if 'gt_segm' in data and data['gt_segm'] is not None:
                gt_segm = data['gt_segm']
                padding_segm = np.zeros(
                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
                    dtype=np.uint8)
                padding_segm[:, :im_h, :im_w] = gt_segm
                data['gt_segm'] = padding_segm
            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
                # ploy to rbox
                polys = data['gt_rbox2poly']
                rbox = bbox_utils.poly2rbox(polys)
                data['gt_rbox'] = rbox
        return samples
@register_op
 class BatchRandomResize(BaseOperator):
    """
    Resize image to target size randomly. random target_size and interpolation method
    Args:
        target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
        keep_ratio (bool): whether keep_raio or not, default true
        interp (int): the interpolation method
        random_size (bool): whether random select target size of image
        random_interp (bool): whether random select interpolation method
    """
    def __init__(self,
                 target_size,
                 keep_ratio,
                 interp=cv2.INTER_NEAREST,
                 random_size=True,
                 random_interp=False):
        super(BatchRandomResize, self).__init__()
        self.keep_ratio = keep_ratio
        self.interps = [
            cv2.INTER_NEAREST,
            cv2.INTER_LINEAR,
            cv2.INTER_AREA,
            cv2.INTER_CUBIC,
            cv2.INTER_LANCZOS4,
        ]
        self.interp = interp
        assert isinstance(target_size, (
            int, Sequence)), "target_size must be int, list or tuple"
        if random_size and not isinstance(target_size, list):
            raise TypeError(
                "Type of target_size is invalid when random_size is True. Must be List, now is {}".
                format(type(target_size)))
        self.target_size = target_size
        self.random_size = random_size
        self.random_interp = random_interp
    def __call__(self, samples, context=None):
        if self.random_size:
            index = np.random.choice(len(self.target_size))
            target_size = self.target_size[index]
        else:
            target_size = self.target_size
        if self.random_interp:
            interp = np.random.choice(self.interps)
        else:
            interp = self.interp
        resizer = Resize(target_size, keep_ratio=self.keep_ratio, interp=interp)
        return resizer(samples, context=context)
@register_op
 class Gt2YoloTarget(BaseOperator):
    """
    Generate YOLOv3 targets by groud truth data, this operator is only used in
    fine grained YOLOv3 loss mode
    """
    def __init__(self,
                 anchors,
                 anchor_masks,
                 downsample_ratios,
                 num_classes=80,
                 iou_thresh=1.):
        super(Gt2YoloTarget, self).__init__()
        self.anchors = anchors
        self.anchor_masks = anchor_masks
        self.downsample_ratios = downsample_ratios
        self.num_classes = num_classes
        self.iou_thresh = iou_thresh
    def __call__(self, samples, context=None):
        assert len(self.anchor_masks) == len(self.downsample_ratios), \
            "anchor_masks', and 'downsample_ratios' should have same length."
        h, w = samples[0]['image'].shape[1:3]
        an_hw = np.array(self.anchors) / np.array([[w, h]])
        for sample in samples:
            # im, gt_bbox, gt_class, gt_score = sample
            im = sample['image']
            gt_bbox = sample['gt_bbox']
            gt_class = sample['gt_class']
            if 'gt_score' not in sample:
                sample['gt_score'] = np.ones(
                    (gt_bbox.shape[0], 1), dtype=np.float32)
            gt_score = sample['gt_score']
            for i, (
                    mask, downsample_ratio
            ) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
                grid_h = int(h / downsample_ratio)
                grid_w = int(w / downsample_ratio)
                target = np.zeros(
                    (len(mask), 6 + self.num_classes, grid_h, grid_w),
                    dtype=np.float32)
                for b in range(gt_bbox.shape[0]):
                    gx, gy, gw, gh = gt_bbox[b, :]
                    cls = gt_class[b]
                    score = gt_score[b]
                    if gw <= 0. or gh <= 0. or score <= 0.:
                        continue
                    # find best match anchor index
                    best_iou = 0.
                    best_idx = -1
                    for an_idx in range(an_hw.shape[0]):
                        iou = jaccard_overlap(
                            [0., 0., gw, gh],
                            [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
                        if iou > best_iou:
                            best_iou = iou
                            best_idx = an_idx
                    gi = int(gx * grid_w)
                    gj = int(gy * grid_h)
                    # gtbox should be regresed in this layes if best match 
                    # anchor index in anchor mask of this layer
                    if best_idx in mask:
                        best_n = mask.index(best_idx)
                        # x, y, w, h, scale
                        target[best_n, 0, gj, gi] = gx * grid_w - gi
                        target[best_n, 1, gj, gi] = gy * grid_h - gj
                        target[best_n, 2, gj, gi] = np.log(
                            gw * w / self.anchors[best_idx][0])
                        target[best_n, 3, gj, gi] = np.log(
                            gh * h / self.anchors[best_idx][1])
                        target[best_n, 4, gj, gi] = 2.0 - gw * gh
                        # objectness record gt_score
                        target[best_n, 5, gj, gi] = score
                        # classification
                        target[best_n, 6 + cls, gj, gi] = 1.
                    # For non-matched anchors, calculate the target if the iou 
                    # between anchor and gt is larger than iou_thresh
                    if self.iou_thresh < 1:
                        for idx, mask_i in enumerate(mask):
                            if mask_i == best_idx: continue
                            iou = jaccard_overlap(
                                [0., 0., gw, gh],
                                [0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
                            if iou > self.iou_thresh and target[idx, 5, gj,
                                                                gi] == 0.:
                                # x, y, w, h, scale
                                target[idx, 0, gj, gi] = gx * grid_w - gi
                                target[idx, 1, gj, gi] = gy * grid_h - gj
                                target[idx, 2, gj, gi] = np.log(
                                    gw * w / self.anchors[mask_i][0])
                                target[idx, 3, gj, gi] = np.log(
                                    gh * h / self.anchors[mask_i][1])
                                target[idx, 4, gj, gi] = 2.0 - gw * gh
                                # objectness record gt_score
                                target[idx, 5, gj, gi] = score
                                # classification
                                target[idx, 6 + cls, gj, gi] = 1.
                sample['target{}'.format(i)] = target
            # remove useless gt_class and gt_score after target calculated
            sample.pop('gt_class')
            sample.pop('gt_score')
        return samples
@register_op
 class Gt2FCOSTarget(BaseOperator):
    """
    Generate FCOS targets by groud truth data
    """
    def __init__(self,
                 object_sizes_boundary,
                 center_sampling_radius,
                 downsample_ratios,
                 norm_reg_targets=False):
        super(Gt2FCOSTarget, self).__init__()
        self.center_sampling_radius = center_sampling_radius
        self.downsample_ratios = downsample_ratios
        self.INF = np.inf
        self.object_sizes_boundary = [-1] + object_sizes_boundary + [self.INF]
        object_sizes_of_interest = []
        for i in range(len(self.object_sizes_boundary) - 1):
            object_sizes_of_interest.append([
                self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1]
            ])
        self.object_sizes_of_interest = object_sizes_of_interest
        self.norm_reg_targets = norm_reg_targets
    def _compute_points(self, w, h):
        """
        compute the corresponding points in each feature map
        :param h: image height
        :param w: image width
        :return: points from all feature map
        """
        locations = []
        for stride in self.downsample_ratios:
            shift_x = np.arange(0, w, stride).astype(np.float32)
            shift_y = np.arange(0, h, stride).astype(np.float32)
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shift_x = shift_x.flatten()
            shift_y = shift_y.flatten()
            location = np.stack([shift_x, shift_y], axis=1) + stride // 2
            locations.append(location)
        num_points_each_level = [len(location) for location in locations]
        locations = np.concatenate(locations, axis=0)
        return locations, num_points_each_level
    def _convert_xywh2xyxy(self, gt_bbox, w, h):
        """
        convert the bounding box from style xywh to xyxy
        :param gt_bbox: bounding boxes normalized into [0, 1]
        :param w: image width
        :param h: image height
        :return: bounding boxes in xyxy style
        """
        bboxes = gt_bbox.copy()
        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * w
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * h
        bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
        bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
        return bboxes
    def _check_inside_boxes_limited(self, gt_bbox, xs, ys,
                                    num_points_each_level):
        """
        check if points is within the clipped boxes
        :param gt_bbox: bounding boxes
        :param xs: horizontal coordinate of points
        :param ys: vertical coordinate of points
        :return: the mask of points is within gt_box or not
        """
        bboxes = np.reshape(
            gt_bbox, newshape=[1, gt_bbox.shape[0], gt_bbox.shape[1]])
        bboxes = np.tile(bboxes, reps=[xs.shape[0], 1, 1])
        ct_x = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2
        ct_y = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2
        beg = 0
        clipped_box = bboxes.copy()
        for lvl, stride in enumerate(self.downsample_ratios):
            end = beg + num_points_each_level[lvl]
            stride_exp = self.center_sampling_radius * stride
            clipped_box[beg:end, :, 0] = np.maximum(
                bboxes[beg:end, :, 0], ct_x[beg:end, :] - stride_exp)
            clipped_box[beg:end, :, 1] = np.maximum(
                bboxes[beg:end, :, 1], ct_y[beg:end, :] - stride_exp)
            clipped_box[beg:end, :, 2] = np.minimum(
                bboxes[beg:end, :, 2], ct_x[beg:end, :] + stride_exp)
            clipped_box[beg:end, :, 3] = np.minimum(
                bboxes[beg:end, :, 3], ct_y[beg:end, :] + stride_exp)
            beg = end
        l_res = xs - clipped_box[:, :, 0]
        r_res = clipped_box[:, :, 2] - xs
        t_res = ys - clipped_box[:, :, 1]
        b_res = clipped_box[:, :, 3] - ys
        clipped_box_reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
        inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
        return inside_gt_box
    def __call__(self, samples, context=None):
        assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
            "object_sizes_of_interest', and 'downsample_ratios' should have same length."
        for sample in samples:
            # im, gt_bbox, gt_class, gt_score = sample
            im = sample['image']
            bboxes = sample['gt_bbox']
            gt_class = sample['gt_class']
            # calculate the locations
            h, w = im.shape[1:3]
            points, num_points_each_level = self._compute_points(w, h)
            object_scale_exp = []
            for i, num_pts in enumerate(num_points_each_level):
                object_scale_exp.append(
                    np.tile(
                        np.array([self.object_sizes_of_interest[i]]),
                        reps=[num_pts, 1]))
            object_scale_exp = np.concatenate(object_scale_exp, axis=0)
            gt_area = (bboxes[:, 2] - bboxes[:, 0]) * (
                bboxes[:, 3] - bboxes[:, 1])
            xs, ys = points[:, 0], points[:, 1]
            xs = np.reshape(xs, newshape=[xs.shape[0], 1])
            xs = np.tile(xs, reps=[1, bboxes.shape[0]])
            ys = np.reshape(ys, newshape=[ys.shape[0], 1])
            ys = np.tile(ys, reps=[1, bboxes.shape[0]])
            l_res = xs - bboxes[:, 0]
            r_res = bboxes[:, 2] - xs
            t_res = ys - bboxes[:, 1]
            b_res = bboxes[:, 3] - ys
            reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
            if self.center_sampling_radius > 0:
                is_inside_box = self._check_inside_boxes_limited(
                    bboxes, xs, ys, num_points_each_level)
            else:
                is_inside_box = np.min(reg_targets, axis=2) > 0
            # check if the targets is inside the corresponding level
            max_reg_targets = np.max(reg_targets, axis=2)
            lower_bound = np.tile(
                np.expand_dims(
                    object_scale_exp[:, 0], axis=1),
                reps=[1, max_reg_targets.shape[1]])
            high_bound = np.tile(
                np.expand_dims(
                    object_scale_exp[:, 1], axis=1),
                reps=[1, max_reg_targets.shape[1]])
            is_match_current_level = \
                (max_reg_targets > lower_bound) & \
                (max_reg_targets < high_bound)
            points2gtarea = np.tile(
                np.expand_dims(
                    gt_area, axis=0), reps=[xs.shape[0], 1])
            points2gtarea[is_inside_box == 0] = self.INF
            points2gtarea[is_match_current_level == 0] = self.INF
            points2min_area = points2gtarea.min(axis=1)
            points2min_area_ind = points2gtarea.argmin(axis=1)
            labels = gt_class[points2min_area_ind] + 1
            labels[points2min_area == self.INF] = 0
            reg_targets = reg_targets[range(xs.shape[0]), points2min_area_ind]
            ctn_targets = np.sqrt((reg_targets[:, [0, 2]].min(axis=1) / \
                                  reg_targets[:, [0, 2]].max(axis=1)) * \
                                  (reg_targets[:, [1, 3]].min(axis=1) / \
                                   reg_targets[:, [1, 3]].max(axis=1))).astype(np.float32)
            ctn_targets = np.reshape(
                ctn_targets, newshape=[ctn_targets.shape[0], 1])
            ctn_targets[labels <= 0] = 0
            pos_ind = np.nonzero(labels != 0)
            reg_targets_pos = reg_targets[pos_ind[0], :]
            split_sections = []
            beg = 0
            for lvl in range(len(num_points_each_level)):
                end = beg + num_points_each_level[lvl]
                split_sections.append(end)
                beg = end
            labels_by_level = np.split(labels, split_sections, axis=0)
            reg_targets_by_level = np.split(reg_targets, split_sections, axis=0)
            ctn_targets_by_level = np.split(ctn_targets, split_sections, axis=0)
            for lvl in range(len(self.downsample_ratios)):
                grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
                grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
                if self.norm_reg_targets:
                    sample['reg_target{}'.format(lvl)] = \
                        np.reshape(
                            reg_targets_by_level[lvl] / \
                            self.downsample_ratios[lvl],
                            newshape=[grid_h, grid_w, 4])
                else:
                    sample['reg_target{}'.format(lvl)] = np.reshape(
                        reg_targets_by_level[lvl],
                        newshape=[grid_h, grid_w, 4])
                sample['labels{}'.format(lvl)] = np.reshape(
                    labels_by_level[lvl], newshape=[grid_h, grid_w, 1])
                sample['centerness{}'.format(lvl)] = np.reshape(
                    ctn_targets_by_level[lvl], newshape=[grid_h, grid_w, 1])
            sample.pop('is_crowd', None)
            sample.pop('difficult', None)
            sample.pop('gt_class', None)
            sample.pop('gt_bbox', None)
        return samples
@register_op
 class Gt2TTFTarget(BaseOperator):
    __shared__ = ['num_classes']
    """
    Gt2TTFTarget
    Generate TTFNet targets by ground truth data
    Args:
        num_classes(int): the number of classes.
        down_ratio(int): the down ratio from images to heatmap, 4 by default.
        alpha(float): the alpha parameter to generate gaussian target.
            0.54 by default.
    """
    def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
        super(Gt2TTFTarget, self).__init__()
        self.down_ratio = down_ratio
        self.num_classes = num_classes
        self.alpha = alpha
    def __call__(self, samples, context=None):
        output_size = samples[0]['image'].shape[1]
        feat_size = output_size // self.down_ratio
        for sample in samples:
            heatmap = np.zeros(
                (self.num_classes, feat_size, feat_size), dtype='float32')
            box_target = np.ones(
                (4, feat_size, feat_size), dtype='float32') * -1
            reg_weight = np.zeros((1, feat_size, feat_size), dtype='float32')
            gt_bbox = sample['gt_bbox']
            gt_class = sample['gt_class']
            bbox_w = gt_bbox[:, 2] - gt_bbox[:, 0] + 1
            bbox_h = gt_bbox[:, 3] - gt_bbox[:, 1] + 1
            area = bbox_w * bbox_h
            boxes_areas_log = np.log(area)
            boxes_ind = np.argsort(boxes_areas_log, axis=0)[::-1]
            boxes_area_topk_log = boxes_areas_log[boxes_ind]
            gt_bbox = gt_bbox[boxes_ind]
            gt_class = gt_class[boxes_ind]
            feat_gt_bbox = gt_bbox / self.down_ratio
            feat_gt_bbox = np.clip(feat_gt_bbox, 0, feat_size - 1)
            feat_hs, feat_ws = (feat_gt_bbox[:, 3] - feat_gt_bbox[:, 1],
                                feat_gt_bbox[:, 2] - feat_gt_bbox[:, 0])
            ct_inds = np.stack(
                [(gt_bbox[:, 0] + gt_bbox[:, 2]) / 2,
                 (gt_bbox[:, 1] + gt_bbox[:, 3]) / 2],
                axis=1) / self.down_ratio
            h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype('int32')
            w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype('int32')
            for k in range(len(gt_bbox)):
                cls_id = gt_class[k]
                fake_heatmap = np.zeros((feat_size, feat_size), dtype='float32')
                self.draw_truncate_gaussian(fake_heatmap, ct_inds[k],
                                            h_radiuses_alpha[k],
                                            w_radiuses_alpha[k])
                heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
                box_target_inds = fake_heatmap > 0
                box_target[:, box_target_inds] = gt_bbox[k][:, None]
                local_heatmap = fake_heatmap[box_target_inds]
                ct_div = np.sum(local_heatmap)
                local_heatmap *= boxes_area_topk_log[k]
                reg_weight[0, box_target_inds] = local_heatmap / ct_div
            sample['ttf_heatmap'] = heatmap
            sample['ttf_box_target'] = box_target
            sample['ttf_reg_weight'] = reg_weight
            sample.pop('is_crowd', None)
            sample.pop('difficult', None)
            sample.pop('gt_class', None)
            sample.pop('gt_bbox', None)
            sample.pop('gt_score', None)
        return samples
    def draw_truncate_gaussian(self, heatmap, center, h_radius, w_radius):
        h, w = 2 * h_radius + 1, 2 * w_radius + 1
        sigma_x = w / 6
        sigma_y = h / 6
        gaussian = gaussian2D((h, w), sigma_x, sigma_y)
        x, y = int(center[0]), int(center[1])
        height, width = heatmap.shape[0:2]
        left, right = min(x, w_radius), min(width - x, w_radius + 1)
        top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
        masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
        masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius -
                                   left:w_radius + right]
        if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
            heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
                masked_heatmap, masked_gaussian)
        return heatmap
@register_op
 class Gt2Solov2Target(BaseOperator):
    """Assign mask target and labels in SOLOv2 network.
    Args:
        num_grids (list): The list of feature map grids size.
        scale_ranges (list): The list of mask boundary range.
        coord_sigma (float): The coefficient of coordinate area length.
        sampling_ratio (float): The ratio of down sampling.
    """
    def __init__(self,
                 num_grids=[40, 36, 24, 16, 12],
                 scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
                               [384, 2048]],
                 coord_sigma=0.2,
                 sampling_ratio=4.0):
        super(Gt2Solov2Target, self).__init__()
        self.num_grids = num_grids
        self.scale_ranges = scale_ranges
        self.coord_sigma = coord_sigma
        self.sampling_ratio = sampling_ratio
    def _scale_size(self, im, scale):
        h, w = im.shape[:2]
        new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
        resized_img = cv2.resize(
            im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
        return resized_img
    def __call__(self, samples, context=None):
        sample_id = 0
        max_ins_num = [0] * len(self.num_grids)
        for sample in samples:
            gt_bboxes_raw = sample['gt_bbox']
            gt_labels_raw = sample['gt_class'] + 1
            im_c, im_h, im_w = sample['image'].shape[:]
            gt_masks_raw = sample['gt_segm'].astype(np.uint8)
            mask_feat_size = [
                int(im_h / self.sampling_ratio), int(im_w / self.sampling_ratio)
            ]
            gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
                               (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
            ins_ind_label_list = []
            idx = 0
            for (lower_bound, upper_bound), num_grid \
                    in zip(self.scale_ranges, self.num_grids):
                hit_indices = ((gt_areas >= lower_bound) &
                               (gt_areas <= upper_bound)).nonzero()[0]
                num_ins = len(hit_indices)
                ins_label = []
                grid_order = []
                cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
                ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
                if num_ins == 0:
                    ins_label = np.zeros(
                        [1, mask_feat_size[0], mask_feat_size[1]],
                        dtype=np.uint8)
                    ins_ind_label_list.append(ins_ind_label)
                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
                    sample['ins_label{}'.format(idx)] = ins_label
                    sample['grid_order{}'.format(idx)] = np.asarray(
                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
                    idx += 1
                    continue
                gt_bboxes = gt_bboxes_raw[hit_indices]
                gt_labels = gt_labels_raw[hit_indices]
                gt_masks = gt_masks_raw[hit_indices, ...]
                half_ws = 0.5 * (
                    gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
                half_hs = 0.5 * (
                    gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
                for seg_mask, gt_label, half_h, half_w in zip(
                        gt_masks, gt_labels, half_hs, half_ws):
                    if seg_mask.sum() == 0:
                        continue
                    # mass center
                    upsampled_size = (mask_feat_size[0] * 4,
                                      mask_feat_size[1] * 4)
                    center_h, center_w = ndimage.measurements.center_of_mass(
                        seg_mask)
                    coord_w = int(
                        (center_w / upsampled_size[1]) // (1. / num_grid))
                    coord_h = int(
                        (center_h / upsampled_size[0]) // (1. / num_grid))
                    # left, top, right, down
                    top_box = max(0,
                                  int(((center_h - half_h) / upsampled_size[0])
                                      // (1. / num_grid)))
                    down_box = min(num_grid - 1,
                                   int(((center_h + half_h) / upsampled_size[0])
                                       // (1. / num_grid)))
                    left_box = max(0,
                                   int(((center_w - half_w) / upsampled_size[1])
                                       // (1. / num_grid)))
                    right_box = min(num_grid - 1,
                                    int(((center_w + half_w) /
                                         upsampled_size[1]) // (1. / num_grid)))
                    top = max(top_box, coord_h - 1)
                    down = min(down_box, coord_h + 1)
                    left = max(coord_w - 1, left_box)
                    right = min(right_box, coord_w + 1)
                    cate_label[top:(down + 1), left:(right + 1)] = gt_label
                    seg_mask = self._scale_size(
                        seg_mask, scale=1. / self.sampling_ratio)
                    for i in range(top, down + 1):
                        for j in range(left, right + 1):
                            label = int(i * num_grid + j)
                            cur_ins_label = np.zeros(
                                [mask_feat_size[0], mask_feat_size[1]],
                                dtype=np.uint8)
                            cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
                                1]] = seg_mask
                            ins_label.append(cur_ins_label)
                            ins_ind_label[label] = True
                            grid_order.append(sample_id * num_grid * num_grid +
                                              label)
                if ins_label == []:
                    ins_label = np.zeros(
                        [1, mask_feat_size[0], mask_feat_size[1]],
                        dtype=np.uint8)
                    ins_ind_label_list.append(ins_ind_label)
                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
                    sample['ins_label{}'.format(idx)] = ins_label
                    sample['grid_order{}'.format(idx)] = np.asarray(
                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
                else:
                    ins_label = np.stack(ins_label, axis=0)
                    ins_ind_label_list.append(ins_ind_label)
                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
                    sample['ins_label{}'.format(idx)] = ins_label
                    sample['grid_order{}'.format(idx)] = np.asarray(
                        grid_order, dtype=np.int32)
                    assert len(grid_order) > 0
                max_ins_num[idx] = max(
                    max_ins_num[idx],
                    sample['ins_label{}'.format(idx)].shape[0])
                idx += 1
            ins_ind_labels = np.concatenate([
                ins_ind_labels_level_img
                for ins_ind_labels_level_img in ins_ind_label_list
            ])
            fg_num = np.sum(ins_ind_labels)
            sample['fg_num'] = fg_num
            sample_id += 1
            sample.pop('is_crowd')
            sample.pop('gt_class')
            sample.pop('gt_bbox')
            sample.pop('gt_poly')
            sample.pop('gt_segm')
        # padding batch
        for data in samples:
            for idx in range(len(self.num_grids)):
                gt_ins_data = np.zeros(
                    [
                        max_ins_num[idx],
                        data['ins_label{}'.format(idx)].shape[1],
                        data['ins_label{}'.format(idx)].shape[2]
                    ],
                    dtype=np.uint8)
                gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
                    0], :, :] = data['ins_label{}'.format(idx)]
                gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
                gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
                    0]] = data['grid_order{}'.format(idx)]
                data['ins_label{}'.format(idx)] = gt_ins_data
                data['grid_order{}'.format(idx)] = gt_grid_order
        return samples
--- a/build/lib/ppdet/data/transform/gridmask_utils.py
+++ b/build/lib/ppdet/data/transform/gridmask_utils.py
@ -0,0 +1,83 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import print_function
 from __future__ import division
 import numpy as np
 from PIL import Image
 class Gridmask(object):
    def __init__(self,
                 use_h=True,
                 use_w=True,
                 rotate=1,
                 offset=False,
                 ratio=0.5,
                 mode=1,
                 prob=0.7,
                 upper_iter=360000):
        super(Gridmask, self).__init__()
        self.use_h = use_h
        self.use_w = use_w
        self.rotate = rotate
        self.offset = offset
        self.ratio = ratio
        self.mode = mode
        self.prob = prob
        self.st_prob = prob
        self.upper_iter = upper_iter
    def __call__(self, x, curr_iter):
        self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
        if np.random.rand() > self.prob:
            return x
        h, w, _ = x.shape
        hh = int(1.5 * h)
        ww = int(1.5 * w)
        d = np.random.randint(2, h)
        self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
        mask = np.ones((hh, ww), np.float32)
        st_h = np.random.randint(d)
        st_w = np.random.randint(d)
        if self.use_h:
            for i in range(hh // d):
                s = d * i + st_h
                t = min(s + self.l, hh)
                mask[s:t, :] *= 0
        if self.use_w:
            for i in range(ww // d):
                s = d * i + st_w
                t = min(s + self.l, ww)
                mask[:, s:t] *= 0
        r = np.random.randint(self.rotate)
        mask = Image.fromarray(np.uint8(mask))
        mask = mask.rotate(r)
        mask = np.asarray(mask)
        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
                    + w].astype(np.float32)
        if self.mode == 1:
            mask = 1 - mask
        mask = np.expand_dims(mask, axis=-1)
        if self.offset:
            offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
            x = (x * mask + offset * (1 - mask)).astype(x.dtype)
        else:
            x = (x * mask).astype(x.dtype)
        return x
--- a/build/lib/ppdet/data/transform/keypoint_operators.py
+++ b/build/lib/ppdet/data/transform/keypoint_operators.py
@ -0,0 +1,663 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # function:
 #    operators to process sample,
 #    eg: decode/resize/crop image
 from __future__ import absolute_import
 try:
    from collections.abc import Sequence
 except Exception:
    from collections import Sequence
 import cv2
 import numpy as np
 import math
 import copy
 from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform
 from ppdet.core.workspace import serializable
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 registered_ops = []
 __all__ = [
    'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
    'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
    'TopDownAffine', 'ToHeatmapsTopDown', 'TopDownEvalAffine'
 ]
 def register_keypointop(cls):
    return serializable(cls)
@register_keypointop
 class KeyPointFlip(object):
    """Get the fliped image by flip_prob. flip the coords also
    the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
    Args:
        flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
        flip_prob (float): the ratio whether to flip the image
        records(dict): the dict contained the image, mask and coords
    Returns:
        records(dict): contain the image, mask and coords after tranformed
    """
    def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
        super(KeyPointFlip, self).__init__()
        assert isinstance(flip_permutation, Sequence)
        self.flip_permutation = flip_permutation
        self.flip_prob = flip_prob
        self.hmsize = hmsize
    def __call__(self, records):
        image = records['image']
        kpts_lst = records['joints']
        mask_lst = records['mask']
        flip = np.random.random() < self.flip_prob
        if flip:
            image = image[:, ::-1]
            for idx, hmsize in enumerate(self.hmsize):
                if len(mask_lst) > idx:
                    mask_lst[idx] = mask_lst[idx][:, ::-1]
                if kpts_lst[idx].ndim == 3:
                    kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
                else:
                    kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
                kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
                kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
                kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
                kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
                kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
                kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
        records['image'] = image
        records['joints'] = kpts_lst
        records['mask'] = mask_lst
        return records
 def get_warp_matrix(theta, size_input, size_dst, size_target):
    """Calculate the transformation matrix under the constraint of unbiased.
    Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
    Data Processing for Human Pose Estimation (CVPR 2020).
    Args:
        theta (float): Rotation angle in degrees.
        size_input (np.ndarray): Size of input image [w, h].
        size_dst (np.ndarray): Size of output image [w, h].
        size_target (np.ndarray): Size of ROI in input plane [w, h].
    Returns:
        matrix (np.ndarray): A matrix for transformation.
    """
    theta = np.deg2rad(theta)
    matrix = np.zeros((2, 3), dtype=np.float32)
    scale_x = size_dst[0] / size_target[0]
    scale_y = size_dst[1] / size_target[1]
    matrix[0, 0] = math.cos(theta) * scale_x
    matrix[0, 1] = -math.sin(theta) * scale_x
    matrix[0, 2] = scale_x * (
        -0.5 * size_input[0] * math.cos(theta) + 0.5 * size_input[1] *
        math.sin(theta) + 0.5 * size_target[0])
    matrix[1, 0] = math.sin(theta) * scale_y
    matrix[1, 1] = math.cos(theta) * scale_y
    matrix[1, 2] = scale_y * (
        -0.5 * size_input[0] * math.sin(theta) - 0.5 * size_input[1] *
        math.cos(theta) + 0.5 * size_target[1])
    return matrix
@register_keypointop
 class RandomAffine(object):
    """apply affine transform to image, mask and coords
    to achieve the rotate, scale and shift effect for training image
    Args:
        max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
        max_scale (list[2]): the scale range to apply, transform range is [min, max]
        max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
        scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
        records(dict): the dict contained the image, mask and coords
    Returns:
        records(dict): contain the image, mask and coords after tranformed
    """
    def __init__(self,
                 max_degree=30,
                 scale=[0.75, 1.5],
                 max_shift=0.2,
                 hmsize=[128, 256],
                 trainsize=512,
                 scale_type='short'):
        super(RandomAffine, self).__init__()
        self.max_degree = max_degree
        self.min_scale = scale[0]
        self.max_scale = scale[1]
        self.max_shift = max_shift
        self.hmsize = hmsize
        self.trainsize = trainsize
        self.scale_type = scale_type
    def _get_affine_matrix(self, center, scale, res, rot=0):
        """Generate transformation matrix."""
        h = scale
        t = np.zeros((3, 3), dtype=np.float32)
        t[0, 0] = float(res[1]) / h
        t[1, 1] = float(res[0]) / h
        t[0, 2] = res[1] * (-float(center[0]) / h + .5)
        t[1, 2] = res[0] * (-float(center[1]) / h + .5)
        t[2, 2] = 1
        if rot != 0:
            rot = -rot  # To match direction of rotation from cropping
            rot_mat = np.zeros((3, 3), dtype=np.float32)
            rot_rad = rot * np.pi / 180
            sn, cs = np.sin(rot_rad), np.cos(rot_rad)
            rot_mat[0, :2] = [cs, -sn]
            rot_mat[1, :2] = [sn, cs]
            rot_mat[2, 2] = 1
            # Need to rotate around center
            t_mat = np.eye(3)
            t_mat[0, 2] = -res[1] / 2
            t_mat[1, 2] = -res[0] / 2
            t_inv = t_mat.copy()
            t_inv[:2, 2] *= -1
            t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
        return t
    def __call__(self, records):
        image = records['image']
        keypoints = records['joints']
        heatmap_mask = records['mask']
        degree = (np.random.random() * 2 - 1) * self.max_degree
        shape = np.array(image.shape[:2][::-1])
        center = center = np.array((np.array(shape) / 2))
        aug_scale = np.random.random() * (self.max_scale - self.min_scale
                                          ) + self.min_scale
        if self.scale_type == 'long':
            scale = max(shape[0], shape[1]) / 1.0
        elif self.scale_type == 'short':
            scale = min(shape[0], shape[1]) / 1.0
        else:
            raise ValueError('Unknown scale type: {}'.format(self.scale_type))
        roi_size = aug_scale * scale
        dx = int(0)
        dy = int(0)
        if self.max_shift > 0:
            dx = np.random.randint(-self.max_shift * roi_size,
                                   self.max_shift * roi_size)
            dy = np.random.randint(-self.max_shift * roi_size,
                                   self.max_shift * roi_size)
        center += np.array([dx, dy])
        input_size = 2 * center
        keypoints[..., :2] *= shape
        heatmap_mask *= 255
        kpts_lst = []
        mask_lst = []
        image_affine_mat = self._get_affine_matrix(
            center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
        image = cv2.warpAffine(
            image,
            image_affine_mat, (self.trainsize, self.trainsize),
            flags=cv2.INTER_LINEAR)
        for hmsize in self.hmsize:
            kpts = copy.deepcopy(keypoints)
            mask_affine_mat = self._get_affine_matrix(
                center, roi_size, (hmsize, hmsize), degree)[:2]
            if heatmap_mask is not None:
                mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
                                      (hmsize, hmsize))
                mask = ((mask / 255) > 0.5).astype(np.float32)
            kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
                                                mask_affine_mat)
            kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
            kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
            kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
            kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
            kpts_lst.append(kpts)
            mask_lst.append(mask)
        records['image'] = image
        records['joints'] = kpts_lst
        records['mask'] = mask_lst
        return records
@register_keypointop
 class EvalAffine(object):
    """apply affine transform to image
    resize the short of [h,w] to standard size for eval
    Args:
        size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
        records(dict): the dict contained the image, mask and coords
    Returns:
        records(dict): contain the image, mask and coords after tranformed
    """
    def __init__(self, size, stride=64):
        super(EvalAffine, self).__init__()
        self.size = size
        self.stride = stride
    def __call__(self, records):
        image = records['image']
        mask = records['mask'] if 'mask' in records else None
        s = self.size
        h, w, _ = image.shape
        trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
        image_resized = cv2.warpAffine(image, trans, size_resized)
        if mask is not None:
            mask = cv2.warpAffine(mask, trans, size_resized)
            records['mask'] = mask
        if 'joints' in records:
            del records['joints']
        records['image'] = image_resized
        return records
@register_keypointop
 class NormalizePermute(object):
    def __init__(self,
                 mean=[123.675, 116.28, 103.53],
                 std=[58.395, 57.120, 57.375],
                 is_scale=True):
        super(NormalizePermute, self).__init__()
        self.mean = mean
        self.std = std
        self.is_scale = is_scale
    def __call__(self, records):
        image = records['image']
        image = image.astype(np.float32)
        if self.is_scale:
            image /= 255.
        image = image.transpose((2, 0, 1))
        mean = np.array(self.mean, dtype=np.float32)
        std = np.array(self.std, dtype=np.float32)
        invstd = 1. / std
        for v, m, s in zip(image, mean, invstd):
            v.__isub__(m).__imul__(s)
        records['image'] = image
        return records
@register_keypointop
 class TagGenerate(object):
    """record gt coords for aeloss to sample coords value in tagmaps
    Args:
        num_joints (int): the keypoint numbers of dataset to train
        num_people (int): maxmum people to support for sample aeloss
        records(dict): the dict contained the image, mask and coords
    Returns:
        records(dict): contain the gt coords used in tagmap
    """
    def __init__(self, num_joints, max_people=30):
        super(TagGenerate, self).__init__()
        self.max_people = max_people
        self.num_joints = num_joints
    def __call__(self, records):
        kpts_lst = records['joints']
        kpts = kpts_lst[0]
        tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
        inds = np.where(kpts[..., 2] > 0)
        p, j = inds[0], inds[1]
        visible = kpts[inds]
        # tagmap is [p, j, 3], where last dim is j, y, x
        tagmap[p, j, 0] = j
        tagmap[p, j, 1] = visible[..., 1]  # y
        tagmap[p, j, 2] = visible[..., 0]  # x
        tagmap[p, j, 3] = 1
        records['tagmap'] = tagmap
        del records['joints']
        return records
@register_keypointop
 class ToHeatmaps(object):
    """to generate the gaussin heatmaps of keypoint for heatmap loss
    Args:
        num_joints (int): the keypoint numbers of dataset to train
        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
        sigma (float): the std of gaussin kernel genereted
        records(dict): the dict contained the image, mask and coords
    Returns:
        records(dict): contain the heatmaps used to heatmaploss
    """
    def __init__(self, num_joints, hmsize, sigma=None):
        super(ToHeatmaps, self).__init__()
        self.num_joints = num_joints
        self.hmsize = np.array(hmsize)
        if sigma is None:
            sigma = hmsize[0] // 64
        self.sigma = sigma
        r = 6 * sigma + 3
        x = np.arange(0, r, 1, np.float32)
        y = x[:, None]
        x0, y0 = 3 * sigma + 1, 3 * sigma + 1
        self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
    def __call__(self, records):
        kpts_lst = records['joints']
        mask_lst = records['mask']
        for idx, hmsize in enumerate(self.hmsize):
            mask = mask_lst[idx]
            kpts = kpts_lst[idx]
            heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
            inds = np.where(kpts[..., 2] > 0)
            visible = kpts[inds].astype(np.int64)[..., :2]
            ul = np.round(visible - 3 * self.sigma - 1)
            br = np.round(visible + 3 * self.sigma + 2)
            sul = np.maximum(0, -ul)
            sbr = np.minimum(hmsize, br) - ul
            dul = np.clip(ul, 0, hmsize - 1)
            dbr = np.clip(br, 0, hmsize)
            for i in range(len(visible)):
                dx1, dy1 = dul[i]
                dx2, dy2 = dbr[i]
                sx1, sy1 = sul[i]
                sx2, sy2 = sbr[i]
                heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
                    self.gaussian[sy1:sy2, sx1:sx2],
                    heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
            records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
            records['mask_{}x'.format(idx + 1)] = mask
        del records['mask']
        return records
@register_keypointop
 class RandomFlipHalfBodyTransform(object):
    """apply data augment to image and coords
    to achieve the flip, scale, rotate and half body transform effect for training image
    Args:
        trainsize (list):[w, h], Image target size
        upper_body_ids (list): The upper body joint ids
        flip_pairs (list): The left-right joints exchange order list
        pixel_std (int): The pixel std of the scale
        scale (float): The scale factor to transform the image
        rot (int): The rotate factor to transform the image
        num_joints_half_body (int): The joints threshold of the half body transform
        prob_half_body (float): The threshold of the half body transform
        flip (bool): Whether to flip the image
    Returns:
        records(dict): contain the image and coords after tranformed
    """
    def __init__(self,
                 trainsize,
                 upper_body_ids,
                 flip_pairs,
                 pixel_std,
                 scale=0.35,
                 rot=40,
                 num_joints_half_body=8,
                 prob_half_body=0.3,
                 flip=True,
                 rot_prob=0.6):
        super(RandomFlipHalfBodyTransform, self).__init__()
        self.trainsize = trainsize
        self.upper_body_ids = upper_body_ids
        self.flip_pairs = flip_pairs
        self.pixel_std = pixel_std
        self.scale = scale
        self.rot = rot
        self.num_joints_half_body = num_joints_half_body
        self.prob_half_body = prob_half_body
        self.flip = flip
        self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
        self.rot_prob = rot_prob
    def halfbody_transform(self, joints, joints_vis):
        upper_joints = []
        lower_joints = []
        for joint_id in range(joints.shape[0]):
            if joints_vis[joint_id][0] > 0:
                if joint_id in self.upper_body_ids:
                    upper_joints.append(joints[joint_id])
                else:
                    lower_joints.append(joints[joint_id])
        if np.random.randn() < 0.5 and len(upper_joints) > 2:
            selected_joints = upper_joints
        else:
            selected_joints = lower_joints if len(
                lower_joints) > 2 else upper_joints
        if len(selected_joints) < 2:
            return None, None
        selected_joints = np.array(selected_joints, dtype=np.float32)
        center = selected_joints.mean(axis=0)[:2]
        left_top = np.amin(selected_joints, axis=0)
        right_bottom = np.amax(selected_joints, axis=0)
        w = right_bottom[0] - left_top[0]
        h = right_bottom[1] - left_top[1]
        if w > self.aspect_ratio * h:
            h = w * 1.0 / self.aspect_ratio
        elif w < self.aspect_ratio * h:
            w = h * self.aspect_ratio
        scale = np.array(
            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
            dtype=np.float32)
        scale = scale * 1.5
        return center, scale
    def flip_joints(self, joints, joints_vis, width, matched_parts):
        joints[:, 0] = width - joints[:, 0] - 1
        for pair in matched_parts:
            joints[pair[0], :], joints[pair[1], :] = \
                joints[pair[1], :], joints[pair[0], :].copy()
            joints_vis[pair[0], :], joints_vis[pair[1], :] = \
                joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
        return joints * joints_vis, joints_vis
    def __call__(self, records):
        image = records['image']
        joints = records['joints']
        joints_vis = records['joints_vis']
        c = records['center']
        s = records['scale']
        r = 0
        if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
                np.random.rand() < self.prob_half_body):
            c_half_body, s_half_body = self.halfbody_transform(joints,
                                                               joints_vis)
            if c_half_body is not None and s_half_body is not None:
                c, s = c_half_body, s_half_body
        sf = self.scale
        rf = self.rot
        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        r = np.clip(np.random.randn() * rf, -rf * 2,
                    rf * 2) if np.random.random() <= self.rot_prob else 0
        if self.flip and np.random.random() <= 0.5:
            image = image[:, ::-1, :]
            joints, joints_vis = self.flip_joints(
                joints, joints_vis, image.shape[1], self.flip_pairs)
            c[0] = image.shape[1] - c[0] - 1
        records['image'] = image
        records['joints'] = joints
        records['joints_vis'] = joints_vis
        records['center'] = c
        records['scale'] = s
        records['rotate'] = r
        return records
@register_keypointop
 class TopDownAffine(object):
    """apply affine transform to image and coords
    Args:
        trainsize (list): [w, h], the standard size used to train
        records(dict): the dict contained the image and coords
    Returns:
        records (dict): contain the image and coords after tranformed
    """
    def __init__(self, trainsize):
        self.trainsize = trainsize
    def __call__(self, records):
        image = records['image']
        joints = records['joints']
        joints_vis = records['joints_vis']
        rot = records['rotate'] if "rotate" in records else 0
        trans = get_affine_transform(records['center'], records['scale'] * 200,
                                     rot, self.trainsize)
        image = cv2.warpAffine(
            image,
            trans, (int(self.trainsize[0]), int(self.trainsize[1])),
            flags=cv2.INTER_LINEAR)
        for i in range(joints.shape[0]):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        records['image'] = image
        records['joints'] = joints
        return records
@register_keypointop
 class TopDownEvalAffine(object):
    """apply affine transform to image and coords
    Args:
        trainsize (list): [w, h], the standard size used to train
        records(dict): the dict contained the image and coords
    Returns:
        records (dict): contain the image and coords after tranformed
    """
    def __init__(self, trainsize):
        self.trainsize = trainsize
    def __call__(self, records):
        image = records['image']
        rot = 0
        imshape = records['im_shape'][::-1]
        center = imshape / 2.
        scale = imshape
        trans = get_affine_transform(center, scale, rot, self.trainsize)
        image = cv2.warpAffine(
            image,
            trans, (int(self.trainsize[0]), int(self.trainsize[1])),
            flags=cv2.INTER_LINEAR)
        records['image'] = image
        return records
@register_keypointop
 class ToHeatmapsTopDown(object):
    """to generate the gaussin heatmaps of keypoint for heatmap loss
    Args:
        hmsize (list): [w, h] output heatmap's size
        sigma (float): the std of gaussin kernel genereted
        records(dict): the dict contained the image and coords
    Returns:
        records (dict): contain the heatmaps used to heatmaploss
    """
    def __init__(self, hmsize, sigma):
        super(ToHeatmapsTopDown, self).__init__()
        self.hmsize = np.array(hmsize)
        self.sigma = sigma
    def __call__(self, records):
        joints = records['joints']
        joints_vis = records['joints_vis']
        num_joints = joints.shape[0]
        image_size = np.array(
            [records['image'].shape[1], records['image'].shape[0]])
        target_weight = np.ones((num_joints, 1), dtype=np.float32)
        target_weight[:, 0] = joints_vis[:, 0]
        target = np.zeros(
            (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
        tmp_size = self.sigma * 3
        for joint_id in range(num_joints):
            feat_stride = image_size / self.hmsize
            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
            # Check that any part of the gaussian is in-bounds
            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
            if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
                    0] < 0 or br[1] < 0:
                # If not, just return the image as is
                target_weight[joint_id] = 0
                continue
            # # Generate gaussian
            size = 2 * tmp_size + 1
            x = np.arange(0, size, 1, np.float32)
            y = x[:, np.newaxis]
            x0 = y0 = size // 2
            # The gaussian is not normalized, we want the center value to equal 1
            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
            # Usable gaussian range
            g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
            g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
            # Image range
            img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
            img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
            v = target_weight[joint_id]
            if v > 0.5:
                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
                    0]:g_y[1], g_x[0]:g_x[1]]
        records['target'] = target
        records['target_weight'] = target_weight
        del records['joints'], records['joints_vis']
        return records
--- a/build/lib/ppdet/data/transform/mot_operators.py
+++ b/build/lib/ppdet/data/transform/mot_operators.py
@ -0,0 +1,634 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 try:
    from collections.abc import Sequence
 except Exception:
    from collections import Sequence
 from numbers import Integral
 import cv2
 import copy
 import numpy as np
 import random
 import math
 from .operators import BaseOperator, register_op
 from .batch_operators import Gt2TTFTarget
 from ppdet.modeling.bbox_utils import bbox_iou_np_expand
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
    'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
    'Gt2JDETargetMax', 'Gt2FairMOTTarget'
 ]
@register_op
 class RGBReverse(BaseOperator):
    """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
    """
    def __init__(self):
        super(RGBReverse, self).__init__()
    def apply(self, sample, context=None):
        im = sample['image']
        sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
        return sample
@register_op
 class LetterBoxResize(BaseOperator):
    def __init__(self, target_size):
        """
        Resize image to target size, convert normalized xywh to pixel xyxy
        format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
        Args:
            target_size (int|list): image target size.
        """
        super(LetterBoxResize, self).__init__()
        if not isinstance(target_size, (Integral, Sequence)):
            raise TypeError(
                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
                format(type(target_size)))
        if isinstance(target_size, Integral):
            target_size = [target_size, target_size]
        self.target_size = target_size
    def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
        # letterbox: resize a rectangular image to a padded rectangular
        shape = img.shape[:2]  # [height, width]
        ratio_h = float(height) / shape[0]
        ratio_w = float(width) / shape[1]
        ratio = min(ratio_h, ratio_w)
        new_shape = (round(shape[1] * ratio),
                     round(shape[0] * ratio))  # [width, height]
        padw = (width - new_shape[0]) / 2
        padh = (height - new_shape[1]) / 2
        top, bottom = round(padh - 0.1), round(padh + 0.1)
        left, right = round(padw - 0.1), round(padw + 0.1)
        img = cv2.resize(
            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
        img = cv2.copyMakeBorder(
            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
            value=color)  # padded rectangular
        return img, ratio, padw, padh
    def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
        bboxes = bbox0.copy()
        bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
        bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
        bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
        bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
        return bboxes
    def apply(self, sample, context=None):
        """ Resize the image numpy.
        """
        im = sample['image']
        h, w = sample['im_shape']
        if not isinstance(im, np.ndarray):
            raise TypeError("{}: image type is not numpy.".format(self))
        if len(im.shape) != 3:
            raise ImageError('{}: image is not 3-dimensional.'.format(self))
        # apply image
        height, width = self.target_size
        img, ratio, padw, padh = self.apply_image(
            im, height=height, width=width)
        sample['image'] = img
        new_shape = (round(h * ratio), round(w * ratio))
        sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
        sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
        # apply bbox
        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
                                                padw, padh)
        return sample
@register_op
 class MOTRandomAffine(BaseOperator):
    """ 
    Affine transform to image and coords to achieve the rotate, scale and
    shift effect for training image.
    Args:
        degrees (list[2]): the rotate range to apply, transform range is [min, max]
        translate (list[2]): the translate range to apply, ransform range is [min, max]
        scale (list[2]): the scale range to apply, transform range is [min, max]
        shear (list[2]): the shear range to apply, transform range is [min, max]
        borderValue (list[3]): value used in case of a constant border when appling
            the perspective transformation
        reject_outside (bool): reject warped bounding bboxes outside of image
    Returns:
        records(dict): contain the image and coords after tranformed
    """
    def __init__(self,
                 degrees=(-5, 5),
                 translate=(0.10, 0.10),
                 scale=(0.50, 1.20),
                 shear=(-2, 2),
                 borderValue=(127.5, 127.5, 127.5),
                 reject_outside=True):
        super(MOTRandomAffine, self).__init__()
        self.degrees = degrees
        self.translate = translate
        self.scale = scale
        self.shear = shear
        self.borderValue = borderValue
        self.reject_outside = reject_outside
    def apply(self, sample, context=None):
        # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
        border = 0  # width of added border (optional)
        img = sample['image']
        height, width = img.shape[0], img.shape[1]
        # Rotation and Scale
        R = np.eye(3)
        a = random.random() * (self.degrees[1] - self.degrees[0]
                               ) + self.degrees[0]
        s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
        R[:2] = cv2.getRotationMatrix2D(
            angle=a, center=(width / 2, height / 2), scale=s)
        # Translation
        T = np.eye(3)
        T[0, 2] = (
            random.random() * 2 - 1
        ) * self.translate[0] * height + border  # x translation (pixels)
        T[1, 2] = (
            random.random() * 2 - 1
        ) * self.translate[1] * width + border  # y translation (pixels)
        # Shear
        S = np.eye(3)
        S[0, 1] = math.tan((random.random() *
                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
                           math.pi / 180)  # x shear (deg)
        S[1, 0] = math.tan((random.random() *
                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
                           math.pi / 180)  # y shear (deg)
        M = S @T @R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
        imw = cv2.warpPerspective(
            img,
            M,
            dsize=(width, height),
            flags=cv2.INTER_LINEAR,
            borderValue=self.borderValue)  # BGR order borderValue
        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
            targets = sample['gt_bbox']
            n = targets.shape[0]
            points = targets.copy()
            area0 = (points[:, 2] - points[:, 0]) * (
                points[:, 3] - points[:, 1])
            # warp points
            xy = np.ones((n * 4, 3))
            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
            xy = (xy @M.T)[:, :2].reshape(n, 8)
            # create new boxes
            x = xy[:, [0, 2, 4, 6]]
            y = xy[:, [1, 3, 5, 7]]
            xy = np.concatenate(
                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
            # apply angle-based reduction
            radians = a * math.pi / 180
            reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
            x = (xy[:, 2] + xy[:, 0]) / 2
            y = (xy[:, 3] + xy[:, 1]) / 2
            w = (xy[:, 2] - xy[:, 0]) * reduction
            h = (xy[:, 3] - xy[:, 1]) * reduction
            xy = np.concatenate(
                (x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
            # reject warped points outside of image
            if self.reject_outside:
                np.clip(xy[:, 0], 0, width, out=xy[:, 0])
                np.clip(xy[:, 2], 0, width, out=xy[:, 2])
                np.clip(xy[:, 1], 0, height, out=xy[:, 1])
                np.clip(xy[:, 3], 0, height, out=xy[:, 3])
            w = xy[:, 2] - xy[:, 0]
            h = xy[:, 3] - xy[:, 1]
            area = w * h
            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
            if sum(i) > 0:
                sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
                sample['gt_class'] = sample['gt_class'][i]
                if 'difficult' in sample:
                    sample['difficult'] = sample['difficult'][i]
                if 'gt_ide' in sample:
                    sample['gt_ide'] = sample['gt_ide'][i]
                if 'is_crowd' in sample:
                    sample['is_crowd'] = sample['is_crowd'][i]
                sample['image'] = imw
                return sample
            else:
                return sample
@register_op
 class Gt2JDETargetThres(BaseOperator):
    __shared__ = ['num_classes']
    """
    Generate JDE targets by groud truth data when training
    Args:
        anchors (list): anchors of JDE model
        anchor_masks (list): anchor_masks of JDE model
        downsample_ratios (list): downsample ratios of JDE model
        ide_thresh (float): thresh of identity, higher is groud truth 
        fg_thresh (float): thresh of foreground, higher is foreground
        bg_thresh (float): thresh of background, lower is background
        num_classes (int): number of classes
    """
    def __init__(self,
                 anchors,
                 anchor_masks,
                 downsample_ratios,
                 ide_thresh=0.5,
                 fg_thresh=0.5,
                 bg_thresh=0.4,
                 num_classes=1):
        super(Gt2JDETargetThres, self).__init__()
        self.anchors = anchors
        self.anchor_masks = anchor_masks
        self.downsample_ratios = downsample_ratios
        self.ide_thresh = ide_thresh
        self.fg_thresh = fg_thresh
        self.bg_thresh = bg_thresh
        self.num_classes = num_classes
    def generate_anchor(self, nGh, nGw, anchor_hw):
        nA = len(anchor_hw)
        yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
        mesh = np.stack([xx.T, yy.T], axis=0)  # [2, nGh, nGw]
        mesh = np.repeat(mesh[None, :], nA, axis=0)  # [nA, 2, nGh, nGw]
        anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
        anchor_mesh = np.concatenate(
            [mesh, anchor_offset_mesh], axis=1)  # [nA, 4, nGh, nGw]
        return anchor_mesh
    def encode_delta(self, gt_box_list, fg_anchor_list):
        px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
                        fg_anchor_list[:, 2], fg_anchor_list[:,3]
        gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
                        gt_box_list[:, 2], gt_box_list[:, 3]
        dx = (gx - px) / pw
        dy = (gy - py) / ph
        dw = np.log(gw / pw)
        dh = np.log(gh / ph)
        return np.stack([dx, dy, dw, dh], axis=1)
    def pad_box(self, sample, num_max):
        assert 'gt_bbox' in sample
        bbox = sample['gt_bbox']
        gt_num = len(bbox)
        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
        if gt_num > 0:
            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
        sample['gt_bbox'] = pad_bbox
        if 'gt_score' in sample:
            pad_score = np.zeros((num_max, ), dtype=np.float32)
            if gt_num > 0:
                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
            sample['gt_score'] = pad_score
        if 'difficult' in sample:
            pad_diff = np.zeros((num_max, ), dtype=np.int32)
            if gt_num > 0:
                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
            sample['difficult'] = pad_diff
        if 'is_crowd' in sample:
            pad_crowd = np.zeros((num_max, ), dtype=np.int32)
            if gt_num > 0:
                pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
            sample['is_crowd'] = pad_crowd
        if 'gt_ide' in sample:
            pad_ide = np.zeros((num_max, ), dtype=np.int32)
            if gt_num > 0:
                pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
            sample['gt_ide'] = pad_ide
        return sample
    def __call__(self, samples, context=None):
        assert len(self.anchor_masks) == len(self.downsample_ratios), \
            "anchor_masks', and 'downsample_ratios' should have same length."
        h, w = samples[0]['image'].shape[1:3]
        num_max = 0
        for sample in samples:
            num_max = max(num_max, len(sample['gt_bbox']))
        for sample in samples:
            gt_bbox = sample['gt_bbox']
            gt_ide = sample['gt_ide']
            for i, (anchor_hw, downsample_ratio
                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
                anchor_hw = np.array(
                    anchor_hw, dtype=np.float32) / downsample_ratio
                nA = len(anchor_hw)
                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
                gxy[:, 0] = gxy[:, 0] * nGw
                gxy[:, 1] = gxy[:, 1] * nGh
                gwh[:, 0] = gwh[:, 0] * nGw
                gwh[:, 1] = gwh[:, 1] * nGh
                gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
                gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
                tboxes = np.concatenate([gxy, gwh], axis=1)
                anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
                anchor_list = np.transpose(anchor_mesh,
                                           (0, 2, 3, 1)).reshape(-1, 4)
                iou_pdist = bbox_iou_np_expand(
                    anchor_list, tboxes, x1y1x2y2=False)
                iou_max = np.max(iou_pdist, axis=1)
                max_gt_index = np.argmax(iou_pdist, axis=1)
                iou_map = iou_max.reshape(nA, nGh, nGw)
                gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
                id_index = iou_map > self.ide_thresh
                fg_index = iou_map > self.fg_thresh
                bg_index = iou_map < self.bg_thresh
                ign_index = (iou_map < self.fg_thresh) * (
                    iou_map > self.bg_thresh)
                tconf[fg_index] = 1
                tconf[bg_index] = 0
                tconf[ign_index] = -1
                gt_index = gt_index_map[fg_index]
                gt_box_list = tboxes[gt_index]
                gt_id_list = gt_ide[gt_index_map[id_index]]
                if np.sum(fg_index) > 0:
                    tid[id_index] = gt_id_list
                    fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
                                                         4)[fg_index]
                    delta_target = self.encode_delta(gt_box_list,
                                                     fg_anchor_list)
                    tbox[fg_index] = delta_target
                sample['tbox{}'.format(i)] = tbox
                sample['tconf{}'.format(i)] = tconf
                sample['tide{}'.format(i)] = tid
            sample.pop('gt_class')
            sample = self.pad_box(sample, num_max)
        return samples
@register_op
 class Gt2JDETargetMax(BaseOperator):
    __shared__ = ['num_classes']
    """
    Generate JDE targets by groud truth data when evaluating
    Args:
        anchors (list): anchors of JDE model
        anchor_masks (list): anchor_masks of JDE model
        downsample_ratios (list): downsample ratios of JDE model
        max_iou_thresh (float): iou thresh for high quality anchor
        num_classes (int): number of classes
    """
    def __init__(self,
                 anchors,
                 anchor_masks,
                 downsample_ratios,
                 max_iou_thresh=0.60,
                 num_classes=1):
        super(Gt2JDETargetMax, self).__init__()
        self.anchors = anchors
        self.anchor_masks = anchor_masks
        self.downsample_ratios = downsample_ratios
        self.max_iou_thresh = max_iou_thresh
        self.num_classes = num_classes
    def __call__(self, samples, context=None):
        assert len(self.anchor_masks) == len(self.downsample_ratios), \
            "anchor_masks', and 'downsample_ratios' should have same length."
        h, w = samples[0]['image'].shape[1:3]
        for sample in samples:
            gt_bbox = sample['gt_bbox']
            gt_ide = sample['gt_ide']
            for i, (anchor_hw, downsample_ratio
                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
                anchor_hw = np.array(
                    anchor_hw, dtype=np.float32) / downsample_ratio
                nA = len(anchor_hw)
                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
                gxy[:, 0] = gxy[:, 0] * nGw
                gxy[:, 1] = gxy[:, 1] * nGh
                gwh[:, 0] = gwh[:, 0] * nGw
                gwh[:, 1] = gwh[:, 1] * nGh
                gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
                gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
                # iou of targets-anchors (using wh only)
                box1 = gwh
                box2 = anchor_hw[:, None, :]
                inter_area = np.minimum(box1, box2).prod(2)
                iou = inter_area / (
                    box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
                # Select best iou_pred and anchor
                iou_best = iou.max(0)  # best anchor [0-2] for each target
                a = np.argmax(iou, axis=0)
                # Select best unique target-anchor combinations
                iou_order = np.argsort(-iou_best)  # best to worst
                # Unique anchor selection
                u = np.stack((gi, gj, a), 0)[:, iou_order]
                _, first_unique = np.unique(u, axis=1, return_index=True)
                mask = iou_order[first_unique]
                # best anchor must share significant commonality (iou) with target
                # TODO: examine arbitrary threshold
                idx = mask[iou_best[mask] > self.max_iou_thresh]
                if len(idx) > 0:
                    a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
                    t_box = gt_bbox[idx]
                    t_id = gt_ide[idx]
                    if len(t_box.shape) == 1:
                        t_box = t_box.reshape(1, 4)
                    gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
                    gxy[:, 0] = gxy[:, 0] * nGw
                    gxy[:, 1] = gxy[:, 1] * nGh
                    gwh[:, 0] = gwh[:, 0] * nGw
                    gwh[:, 1] = gwh[:, 1] * nGh
                    # XY coordinates
                    tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
                    # Width and height in yolo method
                    tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
                                                                 anchor_hw[a_i])
                    tconf[a_i, gj_i, gi_i] = 1
                    tid[a_i, gj_i, gi_i] = t_id
                sample['tbox{}'.format(i)] = tbox
                sample['tconf{}'.format(i)] = tconf
                sample['tide{}'.format(i)] = tid
 class Gt2FairMOTTarget(Gt2TTFTarget):
    __shared__ = ['num_classes']
    """
    Generate FairMOT targets by ground truth data.
    Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
        1. the gaussian kernal radius to generate a heatmap.
        2. the targets needed during traing.
    Args:
        num_classes(int): the number of classes.
        down_ratio(int): the down ratio from images to heatmap, 4 by default.
        max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
    """
    def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
        super(Gt2TTFTarget, self).__init__()
        self.down_ratio = down_ratio
        self.num_classes = num_classes
        self.max_objs = max_objs
    def __call__(self, samples, context=None):
        for b_id, sample in enumerate(samples):
            output_h = sample['image'].shape[1] // self.down_ratio
            output_w = sample['image'].shape[2] // self.down_ratio
            heatmap = np.zeros(
                (self.num_classes, output_h, output_w), dtype='float32')
            bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
            center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
            index = np.zeros((self.max_objs, ), dtype=np.int64)
            index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
            reid = np.zeros((self.max_objs, ), dtype=np.int64)
            bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
            gt_bbox = sample['gt_bbox']
            gt_class = sample['gt_class']
            gt_ide = sample['gt_ide']
            for k in range(len(gt_bbox)):
                cls_id = gt_class[k][0]
                bbox = gt_bbox[k]
                ide = gt_ide[k][0]
                bbox[[0, 2]] = bbox[[0, 2]] * output_w
                bbox[[1, 3]] = bbox[[1, 3]] * output_h
                bbox_amodal = copy.deepcopy(bbox)
                bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
                bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
                bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
                bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
                bbox[0] = np.clip(bbox[0], 0, output_w - 1)
                bbox[1] = np.clip(bbox[1], 0, output_h - 1)
                h = bbox[3]
                w = bbox[2]
                bbox_xy = copy.deepcopy(bbox)
                bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
                bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
                bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
                bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
                if h > 0 and w > 0:
                    radius = self.gaussian_radius((math.ceil(h), math.ceil(w)))
                    radius = max(0, int(radius))
                    ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                    ct_int = ct.astype(np.int32)
                    self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
                                                radius)
                    bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
                            bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
                    index[k] = ct_int[1] * output_w + ct_int[0]
                    center_offset[k] = ct - ct_int
                    index_mask[k] = 1
                    reid[k] = ide
                    bbox_xys[k] = bbox_xy
            sample['heatmap'] = heatmap
            sample['index'] = index
            sample['offset'] = center_offset
            sample['size'] = bbox_size
            sample['index_mask'] = index_mask
            sample['reid'] = reid
            sample['bbox_xys'] = bbox_xys
            sample.pop('is_crowd', None)
            sample.pop('difficult', None)
            sample.pop('gt_class', None)
            sample.pop('gt_bbox', None)
            sample.pop('gt_score', None)
            sample.pop('gt_ide', None)
        return samples
    def gaussian_radius(self, det_size, min_overlap=0.7):
        height, width = det_size
        a1 = 1
        b1 = (height + width)
        c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
        sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
        r1 = (b1 + sq1) / 2
        a2 = 4
        b2 = 2 * (height + width)
        c2 = (1 - min_overlap) * width * height
        sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
        r2 = (b2 + sq2) / 2
        a3 = 4 * min_overlap
        b3 = -2 * min_overlap * (height + width)
        c3 = (min_overlap - 1) * width * height
        sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
        r3 = (b3 + sq3) / 2
        return min(r1, r2, r3)
--- a/build/lib/ppdet/data/transform/op_helper.py
+++ b/build/lib/ppdet/data/transform/op_helper.py
@ -0,0 +1,523 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # this file contains helper methods for BBOX processing
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 import random
 import math
 import cv2
 def meet_emit_constraint(src_bbox, sample_bbox):
    center_x = (src_bbox[2] + src_bbox[0]) / 2
    center_y = (src_bbox[3] + src_bbox[1]) / 2
    if center_x >= sample_bbox[0] and \
            center_x <= sample_bbox[2] and \
            center_y >= sample_bbox[1] and \
            center_y <= sample_bbox[3]:
        return True
    return False
 def clip_bbox(src_bbox):
    src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
    src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
    src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
    src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
    return src_bbox
 def bbox_area(src_bbox):
    if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
        return 0.
    else:
        width = src_bbox[2] - src_bbox[0]
        height = src_bbox[3] - src_bbox[1]
        return width * height
 def is_overlap(object_bbox, sample_bbox):
    if object_bbox[0] >= sample_bbox[2] or \
       object_bbox[2] <= sample_bbox[0] or \
       object_bbox[1] >= sample_bbox[3] or \
       object_bbox[3] <= sample_bbox[1]:
        return False
    else:
        return True
 def filter_and_process(sample_bbox, bboxes, labels, scores=None,
                       keypoints=None):
    new_bboxes = []
    new_labels = []
    new_scores = []
    new_keypoints = []
    new_kp_ignore = []
    for i in range(len(bboxes)):
        new_bbox = [0, 0, 0, 0]
        obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
        if not meet_emit_constraint(obj_bbox, sample_bbox):
            continue
        if not is_overlap(obj_bbox, sample_bbox):
            continue
        sample_width = sample_bbox[2] - sample_bbox[0]
        sample_height = sample_bbox[3] - sample_bbox[1]
        new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
        new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
        new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
        new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
        new_bbox = clip_bbox(new_bbox)
        if bbox_area(new_bbox) > 0:
            new_bboxes.append(new_bbox)
            new_labels.append([labels[i][0]])
            if scores is not None:
                new_scores.append([scores[i][0]])
            if keypoints is not None:
                sample_keypoint = keypoints[0][i]
                for j in range(len(sample_keypoint)):
                    kp_len = sample_height if j % 2 else sample_width
                    sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
                    sample_keypoint[j] = (
                        sample_keypoint[j] - sample_coord) / kp_len
                    sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
                new_keypoints.append(sample_keypoint)
                new_kp_ignore.append(keypoints[1][i])
    bboxes = np.array(new_bboxes)
    labels = np.array(new_labels)
    scores = np.array(new_scores)
    if keypoints is not None:
        keypoints = np.array(new_keypoints)
        new_kp_ignore = np.array(new_kp_ignore)
        return bboxes, labels, scores, (keypoints, new_kp_ignore)
    return bboxes, labels, scores
 def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
    new_bboxes = []
    new_labels = []
    new_scores = []
    for i, bbox in enumerate(bboxes):
        w = float((bbox[2] - bbox[0]) * target_size)
        h = float((bbox[3] - bbox[1]) * target_size)
        if w * h < float(min_size * min_size):
            continue
        else:
            new_bboxes.append(bbox)
            new_labels.append(labels[i])
            if scores is not None and scores.size != 0:
                new_scores.append(scores[i])
    bboxes = np.array(new_bboxes)
    labels = np.array(new_labels)
    scores = np.array(new_scores)
    return bboxes, labels, scores
 def generate_sample_bbox(sampler):
    scale = np.random.uniform(sampler[2], sampler[3])
    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
    aspect_ratio = max(aspect_ratio, (scale**2.0))
    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
    bbox_width = scale * (aspect_ratio**0.5)
    bbox_height = scale / (aspect_ratio**0.5)
    xmin_bound = 1 - bbox_width
    ymin_bound = 1 - bbox_height
    xmin = np.random.uniform(0, xmin_bound)
    ymin = np.random.uniform(0, ymin_bound)
    xmax = xmin + bbox_width
    ymax = ymin + bbox_height
    sampled_bbox = [xmin, ymin, xmax, ymax]
    return sampled_bbox
 def generate_sample_bbox_square(sampler, image_width, image_height):
    scale = np.random.uniform(sampler[2], sampler[3])
    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
    aspect_ratio = max(aspect_ratio, (scale**2.0))
    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
    bbox_width = scale * (aspect_ratio**0.5)
    bbox_height = scale / (aspect_ratio**0.5)
    if image_height < image_width:
        bbox_width = bbox_height * image_height / image_width
    else:
        bbox_height = bbox_width * image_width / image_height
    xmin_bound = 1 - bbox_width
    ymin_bound = 1 - bbox_height
    xmin = np.random.uniform(0, xmin_bound)
    ymin = np.random.uniform(0, ymin_bound)
    xmax = xmin + bbox_width
    ymax = ymin + bbox_height
    sampled_bbox = [xmin, ymin, xmax, ymax]
    return sampled_bbox
 def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
                         resize_width):
    num_gt = len(bbox_labels)
    # np.random.randint range: [low, high)
    rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
    if num_gt != 0:
        norm_xmin = bbox_labels[rand_idx][0]
        norm_ymin = bbox_labels[rand_idx][1]
        norm_xmax = bbox_labels[rand_idx][2]
        norm_ymax = bbox_labels[rand_idx][3]
        xmin = norm_xmin * image_width
        ymin = norm_ymin * image_height
        wid = image_width * (norm_xmax - norm_xmin)
        hei = image_height * (norm_ymax - norm_ymin)
        range_size = 0
        area = wid * hei
        for scale_ind in range(0, len(scale_array) - 1):
            if area > scale_array[scale_ind] ** 2 and area < \
                    scale_array[scale_ind + 1] ** 2:
                range_size = scale_ind + 1
                break
        if area > scale_array[len(scale_array) - 2]**2:
            range_size = len(scale_array) - 2
        scale_choose = 0.0
        if range_size == 0:
            rand_idx_size = 0
        else:
            # np.random.randint range: [low, high)
            rng_rand_size = np.random.randint(0, range_size + 1)
            rand_idx_size = rng_rand_size % (range_size + 1)
        if rand_idx_size == range_size:
            min_resize_val = scale_array[rand_idx_size] / 2.0
            max_resize_val = min(2.0 * scale_array[rand_idx_size],
                                 2 * math.sqrt(wid * hei))
            scale_choose = random.uniform(min_resize_val, max_resize_val)
        else:
            min_resize_val = scale_array[rand_idx_size] / 2.0
            max_resize_val = 2.0 * scale_array[rand_idx_size]
            scale_choose = random.uniform(min_resize_val, max_resize_val)
        sample_bbox_size = wid * resize_width / scale_choose
        w_off_orig = 0.0
        h_off_orig = 0.0
        if sample_bbox_size < max(image_height, image_width):
            if wid <= sample_bbox_size:
                w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
                                               xmin)
            else:
                w_off_orig = np.random.uniform(xmin,
                                               xmin + wid - sample_bbox_size)
            if hei <= sample_bbox_size:
                h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
                                               ymin)
            else:
                h_off_orig = np.random.uniform(ymin,
                                               ymin + hei - sample_bbox_size)
        else:
            w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
            h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
        w_off_orig = math.floor(w_off_orig)
        h_off_orig = math.floor(h_off_orig)
        # Figure out top left coordinates.
        w_off = float(w_off_orig / image_width)
        h_off = float(h_off_orig / image_height)
        sampled_bbox = [
            w_off, h_off, w_off + float(sample_bbox_size / image_width),
            h_off + float(sample_bbox_size / image_height)
        ]
        return sampled_bbox
    else:
        return 0
 def jaccard_overlap(sample_bbox, object_bbox):
    if sample_bbox[0] >= object_bbox[2] or \
        sample_bbox[2] <= object_bbox[0] or \
        sample_bbox[1] >= object_bbox[3] or \
        sample_bbox[3] <= object_bbox[1]:
        return 0
    intersect_xmin = max(sample_bbox[0], object_bbox[0])
    intersect_ymin = max(sample_bbox[1], object_bbox[1])
    intersect_xmax = min(sample_bbox[2], object_bbox[2])
    intersect_ymax = min(sample_bbox[3], object_bbox[3])
    intersect_size = (intersect_xmax - intersect_xmin) * (
        intersect_ymax - intersect_ymin)
    sample_bbox_size = bbox_area(sample_bbox)
    object_bbox_size = bbox_area(object_bbox)
    overlap = intersect_size / (
        sample_bbox_size + object_bbox_size - intersect_size)
    return overlap
 def intersect_bbox(bbox1, bbox2):
    if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
        bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
        intersection_box = [0.0, 0.0, 0.0, 0.0]
    else:
        intersection_box = [
            max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
            min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
        ]
    return intersection_box
 def bbox_coverage(bbox1, bbox2):
    inter_box = intersect_bbox(bbox1, bbox2)
    intersect_size = bbox_area(inter_box)
    if intersect_size > 0:
        bbox1_size = bbox_area(bbox1)
        return intersect_size / bbox1_size
    else:
        return 0.
 def satisfy_sample_constraint(sampler,
                              sample_bbox,
                              gt_bboxes,
                              satisfy_all=False):
    if sampler[6] == 0 and sampler[7] == 0:
        return True
    satisfied = []
    for i in range(len(gt_bboxes)):
        object_bbox = [
            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
        ]
        overlap = jaccard_overlap(sample_bbox, object_bbox)
        if sampler[6] != 0 and \
                overlap < sampler[6]:
            satisfied.append(False)
            continue
        if sampler[7] != 0 and \
                overlap > sampler[7]:
            satisfied.append(False)
            continue
        satisfied.append(True)
        if not satisfy_all:
            return True
    if satisfy_all:
        return np.all(satisfied)
    else:
        return False
 def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
    if sampler[6] == 0 and sampler[7] == 0:
        has_jaccard_overlap = False
    else:
        has_jaccard_overlap = True
    if sampler[8] == 0 and sampler[9] == 0:
        has_object_coverage = False
    else:
        has_object_coverage = True
    if not has_jaccard_overlap and not has_object_coverage:
        return True
    found = False
    for i in range(len(gt_bboxes)):
        object_bbox = [
            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
        ]
        if has_jaccard_overlap:
            overlap = jaccard_overlap(sample_bbox, object_bbox)
            if sampler[6] != 0 and \
                    overlap < sampler[6]:
                continue
            if sampler[7] != 0 and \
                    overlap > sampler[7]:
                continue
            found = True
        if has_object_coverage:
            object_coverage = bbox_coverage(object_bbox, sample_bbox)
            if sampler[8] != 0 and \
                    object_coverage < sampler[8]:
                continue
            if sampler[9] != 0 and \
                    object_coverage > sampler[9]:
                continue
            found = True
        if found:
            return True
    return found
 def crop_image_sampling(img, sample_bbox, image_width, image_height,
                        target_size):
    # no clipping here
    xmin = int(sample_bbox[0] * image_width)
    xmax = int(sample_bbox[2] * image_width)
    ymin = int(sample_bbox[1] * image_height)
    ymax = int(sample_bbox[3] * image_height)
    w_off = xmin
    h_off = ymin
    width = xmax - xmin
    height = ymax - ymin
    cross_xmin = max(0.0, float(w_off))
    cross_ymin = max(0.0, float(h_off))
    cross_xmax = min(float(w_off + width - 1.0), float(image_width))
    cross_ymax = min(float(h_off + height - 1.0), float(image_height))
    cross_width = cross_xmax - cross_xmin
    cross_height = cross_ymax - cross_ymin
    roi_xmin = 0 if w_off >= 0 else abs(w_off)
    roi_ymin = 0 if h_off >= 0 else abs(h_off)
    roi_width = cross_width
    roi_height = cross_height
    roi_y1 = int(roi_ymin)
    roi_y2 = int(roi_ymin + roi_height)
    roi_x1 = int(roi_xmin)
    roi_x2 = int(roi_xmin + roi_width)
    cross_y1 = int(cross_ymin)
    cross_y2 = int(cross_ymin + cross_height)
    cross_x1 = int(cross_xmin)
    cross_x2 = int(cross_xmin + cross_width)
    sample_img = np.zeros((height, width, 3))
    sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
        img[cross_y1: cross_y2, cross_x1: cross_x2]
    sample_img = cv2.resize(
        sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
    return sample_img
 def is_poly(segm):
    assert isinstance(segm, (list, dict)), \
        "Invalid segm type: {}".format(type(segm))
    return isinstance(segm, list)
 def gaussian_radius(bbox_size, min_overlap):
    height, width = bbox_size
    a1 = 1
    b1 = (height + width)
    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
    radius1 = (b1 - sq1) / (2 * a1)
    a2 = 4
    b2 = 2 * (height + width)
    c2 = (1 - min_overlap) * width * height
    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
    radius2 = (b2 - sq2) / (2 * a2)
    a3 = 4 * min_overlap
    b3 = -2 * min_overlap * (height + width)
    c3 = (min_overlap - 1) * width * height
    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
    radius3 = (b3 + sq3) / (2 * a3)
    return min(radius1, radius2, radius3)
 def draw_gaussian(heatmap, center, radius, k=1, delte=6):
    diameter = 2 * radius + 1
    sigma = diameter / delte
    gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
    x, y = center
    height, width = heatmap.shape[0:2]
    left, right = min(x, radius), min(width - x, radius + 1)
    top, bottom = min(y, radius), min(height - y, radius + 1)
    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
                               radius + right]
    np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
 def gaussian2D(shape, sigma_x=1, sigma_y=1):
    m, n = [(ss - 1.) / 2. for ss in shape]
    y, x = np.ogrid[-m:m + 1, -n:n + 1]
    h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
                                                            sigma_y)))
    h[h < np.finfo(h.dtype).eps * h.max()] = 0
    return h
 def transform_bbox(sample,
                   M,
                   w,
                   h,
                   area_thr=0.25,
                   wh_thr=2,
                   ar_thr=20,
                   perspective=False):
    """
    transfrom bbox according to tranformation matrix M,
    refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
    """
    bbox = sample['gt_bbox']
    label = sample['gt_class']
    # rotate bbox
    n = len(bbox)
    xy = np.ones((n * 4, 3), dtype=np.float32)
    xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
    # xy = xy @ M.T
    xy = np.matmul(xy, M.T)
    if perspective:
        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
    else:
        xy = xy[:, :2].reshape(n, 8)
    # get new bboxes
    x = xy[:, [0, 2, 4, 6]]
    y = xy[:, [1, 3, 5, 7]]
    bbox = np.concatenate(
        (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
    # clip boxes
    mask = filter_bbox(bbox, w, h, area_thr)
    sample['gt_bbox'] = bbox[mask]
    sample['gt_class'] = sample['gt_class'][mask]
    if 'is_crowd' in sample:
        sample['is_crowd'] = sample['is_crowd'][mask]
    if 'difficult' in sample:
        sample['difficult'] = sample['difficult'][mask]
    return sample
 def filter_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
    """
    filter bbox, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
    """
    # clip boxes
    area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
    bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
    bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
    # compute
    area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
    area_ratio = area2 / (area1 + 1e-16)
    wh = bbox[:, 2:4] - bbox[:, 0:2]
    ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
                          wh[:, 0] / (wh[:, 1] + 1e-16))
    mask = (area_ratio > area_thr) & (
        (wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
    return mask
--- a/build/lib/ppdet/data/transform/operators.py
+++ b/build/lib/ppdet/data/transform/operators.py
--- a/build/lib/ppdet/engine/init.py
+++ b/build/lib/ppdet/engine/init.py
@ -0,0 +1,29 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from . import trainer
 from .trainer import *
 from . import tracker
 from .tracker import *
 from . import callbacks
 from .callbacks import *
 from . import env
 from .env import *
 __all__ = trainer.__all__ \
        + tracker.__all__ \
        + callbacks.__all__ \
        + env.__all__
--- a/build/lib/ppdet/engine/callbacks.py
+++ b/build/lib/ppdet/engine/callbacks.py
@ -0,0 +1,258 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 import datetime
 import six
 import paddle.distributed as dist
 from ppdet.utils.checkpoint import save_model
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('ppdet.engine')
 __all__ = ['Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer']
 class Callback(object):
    def __init__(self, model):
        self.model = model
    def on_step_begin(self, status):
        pass
    def on_step_end(self, status):
        pass
    def on_epoch_begin(self, status):
        pass
    def on_epoch_end(self, status):
        pass
 class ComposeCallback(object):
    def __init__(self, callbacks):
        callbacks = [c for c in list(callbacks) if c is not None]
        for c in callbacks:
            assert isinstance(
                c, Callback), "callback should be subclass of Callback"
        self._callbacks = callbacks
    def on_step_begin(self, status):
        for c in self._callbacks:
            c.on_step_begin(status)
    def on_step_end(self, status):
        for c in self._callbacks:
            c.on_step_end(status)
    def on_epoch_begin(self, status):
        for c in self._callbacks:
            c.on_epoch_begin(status)
    def on_epoch_end(self, status):
        for c in self._callbacks:
            c.on_epoch_end(status)
 class LogPrinter(Callback):
    def __init__(self, model):
        super(LogPrinter, self).__init__(model)
    def on_step_end(self, status):
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            mode = status['mode']
            if mode == 'train':
                epoch_id = status['epoch_id']
                step_id = status['step_id']
                steps_per_epoch = status['steps_per_epoch']
                training_staus = status['training_staus']
                batch_time = status['batch_time']
                data_time = status['data_time']
                epoches = self.model.cfg.epoch
                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
                ))]['batch_size']
                logs = training_staus.log()
                space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
                if step_id % self.model.cfg.log_iter == 0:
                    eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
                    eta_sec = eta_steps * batch_time.global_avg
                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
                    ips = float(batch_size) / batch_time.avg
                    fmt = ' '.join([
                        'Epoch: [{}]',
                        '[{' + space_fmt + '}/{}]',
                        'learning_rate: {lr:.6f}',
                        '{meters}',
                        'eta: {eta}',
                        'batch_cost: {btime}',
                        'data_cost: {dtime}',
                        'ips: {ips:.4f} images/s',
                    ])
                    fmt = fmt.format(
                        epoch_id,
                        step_id,
                        steps_per_epoch,
                        lr=status['learning_rate'],
                        meters=logs,
                        eta=eta_str,
                        btime=str(batch_time),
                        dtime=str(data_time),
                        ips=ips)
                    logger.info(fmt)
            if mode == 'eval':
                step_id = status['step_id']
                if step_id % 100 == 0:
                    logger.info("Eval iter: {}".format(step_id))
    def on_epoch_end(self, status):
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            mode = status['mode']
            if mode == 'eval':
                sample_num = status['sample_num']
                cost_time = status['cost_time']
                logger.info('Total sample number: {}, averge FPS: {}'.format(
                    sample_num, sample_num / cost_time))
 class Checkpointer(Callback):
    def __init__(self, model):
        super(Checkpointer, self).__init__(model)
        cfg = self.model.cfg
        self.best_ap = 0.
        self.save_dir = os.path.join(self.model.cfg.save_dir,
                                     self.model.cfg.filename)
        if hasattr(self.model.model, 'student_model'):
            self.weight = self.model.model.student_model
        else:
            self.weight = self.model.model
    def on_epoch_end(self, status):
        # Checkpointer only performed during training
        mode = status['mode']
        epoch_id = status['epoch_id']
        weight = None
        save_name = None
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'train':
                end_epoch = self.model.cfg.epoch
                if (
                        epoch_id + 1
                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
                    save_name = str(
                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
                    weight = self.weight
            elif mode == 'eval':
                if 'save_best_model' in status and status['save_best_model']:
                    for metric in self.model._metrics:
                        map_res = metric.get_results()
                        if 'bbox' in map_res:
                            key = 'bbox'
                        elif 'keypoint' in map_res:
                            key = 'keypoint'
                        else:
                            key = 'mask'
                        if key not in map_res:
                            logger.warning("Evaluation results empty, this may be due to " \
                                        "training iterations being too few or not " \
                                        "loading the correct weights.")
                            return
                        if map_res[key][0] > self.best_ap:
                            self.best_ap = map_res[key][0]
                            save_name = 'best_model'
                            weight = self.weight
                        logger.info("Best test {} ap is {:0.3f}.".format(
                            key, self.best_ap))
            if weight:
                save_model(weight, self.model.optimizer, self.save_dir,
                           save_name, epoch_id + 1)
 class WiferFaceEval(Callback):
    def __init__(self, model):
        super(WiferFaceEval, self).__init__(model)
    def on_epoch_begin(self, status):
        assert self.model.mode == 'eval', \
            "WiferFaceEval can only be set during evaluation"
        for metric in self.model._metrics:
            metric.update(self.model.model)
        sys.exit()
 class VisualDLWriter(Callback):
    """
    Use VisualDL to log data or image
    """
    def __init__(self, model):
        super(VisualDLWriter, self).__init__(model)
        assert six.PY3, "VisualDL requires Python >= 3.5"
        try:
            from visualdl import LogWriter
        except Exception as e:
            logger.error('visualdl not found, plaese install visualdl. '
                         'for example: `pip install visualdl`.')
            raise e
        self.vdl_writer = LogWriter(
            model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
        self.vdl_loss_step = 0
        self.vdl_mAP_step = 0
        self.vdl_image_step = 0
        self.vdl_image_frame = 0
    def on_step_end(self, status):
        mode = status['mode']
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'train':
                training_staus = status['training_staus']
                for loss_name, loss_value in training_staus.get().items():
                    self.vdl_writer.add_scalar(loss_name, loss_value,
                                               self.vdl_loss_step)
                    self.vdl_loss_step += 1
            elif mode == 'test':
                ori_image = status['original_image']
                result_image = status['result_image']
                self.vdl_writer.add_image(
                    "original/frame_{}".format(self.vdl_image_frame), ori_image,
                    self.vdl_image_step)
                self.vdl_writer.add_image(
                    "result/frame_{}".format(self.vdl_image_frame),
                    result_image, self.vdl_image_step)
                self.vdl_image_step += 1
                # each frame can display ten pictures at most.
                if self.vdl_image_step % 10 == 0:
                    self.vdl_image_step = 0
                    self.vdl_image_frame += 1
    def on_epoch_end(self, status):
        mode = status['mode']
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'eval':
                for metric in self.model._metrics:
                    for key, map_value in metric.get_results().items():
                        self.vdl_writer.add_scalar("{}-mAP".format(key),
                                                   map_value[0],
                                                   self.vdl_mAP_step)
                self.vdl_mAP_step += 1
--- a/build/lib/ppdet/engine/env.py
+++ b/build/lib/ppdet/engine/env.py
@ -0,0 +1,47 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import random
 import numpy as np
 import paddle
 from paddle.distributed import fleet
 __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
 def init_fleet_env():
    fleet.init(is_collective=True)
 def init_parallel_env():
    env = os.environ
    dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
    if dist:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        local_seed = (99 + trainer_id)
        random.seed(local_seed)
        np.random.seed(local_seed)
    paddle.distributed.init_parallel_env()
 def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
--- a/build/lib/ppdet/engine/export_utils.py
+++ b/build/lib/ppdet/engine/export_utils.py
@ -0,0 +1,117 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import yaml
 from collections import OrderedDict
 from ppdet.data.source.category import get_categories
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('ppdet.engine')
 # Global dictionary
 TRT_MIN_SUBGRAPH = {
    'YOLO': 3,
    'SSD': 60,
    'RCNN': 40,
    'RetinaNet': 40,
    'S2ANet': 80,
    'EfficientDet': 40,
    'Face': 3,
    'TTFNet': 60,
    'FCOS': 16,
    'SOLOv2': 60,
    'HigherHRNet': 3,
    'HRNet': 3,
 }
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
 def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
    preprocess_list = []
    anno_file = dataset_cfg.get_anno()
    clsid2catid, catid2name = get_categories(metric, anno_file, arch)
    label_list = [str(cat) for cat in catid2name.values()]
    sample_transforms = reader_cfg['sample_transforms']
    for st in sample_transforms[1:]:
        for key, value in st.items():
            p = {'type': key}
            if key == 'Resize':
                if int(image_shape[1]) != -1:
                    value['target_size'] = image_shape[1:]
            p.update(value)
            preprocess_list.append(p)
    batch_transforms = reader_cfg.get('batch_transforms', None)
    if batch_transforms:
        for bt in batch_transforms:
            for key, value in bt.items():
                # for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride)
                if key == 'PadBatch':
                    preprocess_list.append({
                        'type': 'PadStride',
                        'stride': value['pad_to_stride']
                    })
                    break
    return preprocess_list, label_list
 def _dump_infer_config(config, path, image_shape, model):
    arch_state = False
    from ppdet.core.config.yaml_helpers import setup_orderdict
    setup_orderdict()
    infer_cfg = OrderedDict({
        'mode': 'fluid',
        'draw_threshold': 0.5,
        'metric': config['metric'],
    })
    infer_arch = config['architecture']
    for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items():
        if arch in infer_arch:
            infer_cfg['arch'] = arch
            infer_cfg['min_subgraph_size'] = min_subgraph_size
            arch_state = True
            break
    if not arch_state:
        logger.error(
            'Architecture: {} is not supported for exporting model now'.format(
                infer_arch))
        os._exit(0)
    if 'Mask' in infer_arch:
        infer_cfg['mask'] = True
    label_arch = 'detection_arch'
    if infer_arch in KEYPOINT_ARCH:
        label_arch = 'keypoint_arch'
    infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
        config['TestReader'], config['TestDataset'], config['metric'],
        label_arch, image_shape)
    if infer_arch == 'S2ANet':
        # TODO: move background to num_classes
        if infer_cfg['label_list'][0] != 'background':
            infer_cfg['label_list'].insert(0, 'background')
    yaml.dump(infer_cfg, open(path, 'w'))
    logger.info("Export inference config file to {}".format(os.path.join(path)))
--- a/build/lib/ppdet/engine/tracker.py
+++ b/build/lib/ppdet/engine/tracker.py
@ -0,0 +1,421 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import cv2
 import glob
 import paddle
 import numpy as np
 from ppdet.core.workspace import create
 from ppdet.utils.checkpoint import load_weight
 from ppdet.modeling.mot.utils import Timer, load_det_results
 from ppdet.modeling.mot import visualization as mot_vis
 from ppdet.metrics import Metric, MOTMetric
 from .callbacks import Callback, ComposeCallback
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = ['Tracker']
 class Tracker(object):
    def __init__(self, cfg, mode='eval'):
        self.cfg = cfg
        assert mode.lower() in ['test', 'eval'], \
                "mode should be 'test' or 'eval'"
        self.mode = mode.lower()
        self.optimizer = None
        # build MOT data loader
        self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
        # build model
        self.model = create(cfg.architecture)
        self.status = {}
        self.start_epoch = 0
        # initial default callbacks
        self._init_callbacks()
        # initial default metrics
        self._init_metrics()
        self._reset_metrics()
    def _init_callbacks(self):
        self._callbacks = []
        self._compose_callback = None
    def _init_metrics(self):
        if self.mode in ['test']:
            self._metrics = []
            return
        if self.cfg.metric == 'MOT':
            self._metrics = [MOTMetric(), ]
        else:
            logger.warning("Metric not support for metric type {}".format(
                self.cfg.metric))
            self._metrics = []
    def _reset_metrics(self):
        for metric in self._metrics:
            metric.reset()
    def register_callbacks(self, callbacks):
        callbacks = [h for h in list(callbacks) if h is not None]
        for c in callbacks:
            assert isinstance(c, Callback), \
                    "metrics shoule be instances of subclass of Metric"
        self._callbacks.extend(callbacks)
        self._compose_callback = ComposeCallback(self._callbacks)
    def register_metrics(self, metrics):
        metrics = [m for m in list(metrics) if m is not None]
        for m in metrics:
            assert isinstance(m, Metric), \
                    "metrics shoule be instances of subclass of Metric"
        self._metrics.extend(metrics)
    def load_weights_jde(self, weights):
        load_weight(self.model, weights, self.optimizer)
    def load_weights_sde(self, det_weights, reid_weights):
        if self.model.detector:
            load_weight(self.model.detector, det_weights, self.optimizer)
        load_weight(self.model.reid, reid_weights, self.optimizer)
    def _eval_seq_jde(self,
                      dataloader,
                      save_dir=None,
                      show_image=False,
                      frame_rate=30):
        if save_dir:
            if not os.path.exists(save_dir): os.makedirs(save_dir)
        tracker = self.model.tracker
        tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer)
        timer = Timer()
        results = []
        frame_id = 0
        self.status['mode'] = 'track'
        self.model.eval()
        for step_id, data in enumerate(dataloader):
            self.status['step_id'] = step_id
            if frame_id % 40 == 0:
                logger.info('Processing frame {} ({:.2f} fps)'.format(
                    frame_id, 1. / max(1e-5, timer.average_time)))
            # forward
            timer.tic()
            online_targets = self.model(data)
            online_tlwhs, online_ids = [], []
            for t in online_targets:
                tlwh = t.tlwh
                tid = t.track_id
                vertical = tlwh[2] / tlwh[3] > 1.6
                if tlwh[2] * tlwh[3] > tracker.min_box_area and not vertical:
                    online_tlwhs.append(tlwh)
                    online_ids.append(tid)
            timer.toc()
            # save results
            results.append((frame_id + 1, online_tlwhs, online_ids))
            self.save_results(data, frame_id, online_ids, online_tlwhs,
                              timer.average_time, show_image, save_dir)
            frame_id += 1
        return results, frame_id, timer.average_time, timer.calls
    def _eval_seq_sde(self,
                      dataloader,
                      save_dir=None,
                      show_image=False,
                      frame_rate=30,
                      det_file=''):
        if save_dir:
            if not os.path.exists(save_dir): os.makedirs(save_dir)
        tracker = self.model.tracker
        use_detector = False if not self.model.detector else True
        timer = Timer()
        results = []
        frame_id = 0
        self.status['mode'] = 'track'
        self.model.eval()
        self.model.reid.eval()
        if not use_detector:
            dets_list = load_det_results(det_file, len(dataloader))
            logger.info('Finish loading detection results file {}.'.format(
                det_file))
        for step_id, data in enumerate(dataloader):
            self.status['step_id'] = step_id
            if frame_id % 40 == 0:
                logger.info('Processing frame {} ({:.2f} fps)'.format(
                    frame_id, 1. / max(1e-5, timer.average_time)))
            timer.tic()
            if not use_detector:
                timer.tic()
                dets = dets_list[frame_id]
                bbox_tlwh = paddle.to_tensor(dets['bbox'], dtype='float32')
                pred_scores = paddle.to_tensor(dets['score'], dtype='float32')
                if bbox_tlwh.shape[0] > 0:
                    pred_bboxes = paddle.concat(
                        (bbox_tlwh[:, 0:2],
                         bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]),
                        axis=1)
                else:
                    pred_bboxes = []
                    pred_scores = []
                data.update({
                    'pred_bboxes': pred_bboxes,
                    'pred_scores': pred_scores
                })
            # forward
            timer.tic()
            online_targets = self.model(data)
            online_tlwhs = []
            online_ids = []
            for track in online_targets:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                tlwh = track.to_tlwh()
                track_id = track.track_id
                online_tlwhs.append(tlwh)
                online_ids.append(track_id)
            timer.toc()
            # save results
            results.append((frame_id + 1, online_tlwhs, online_ids))
            self.save_results(data, frame_id, online_ids, online_tlwhs,
                              timer.average_time, show_image, save_dir)
            frame_id += 1
        return results, frame_id, timer.average_time, timer.calls
    def mot_evaluate(self,
                     data_root,
                     seqs,
                     output_dir,
                     data_type='mot',
                     model_type='JDE',
                     save_images=False,
                     save_videos=False,
                     show_image=False,
                     det_results_dir=''):
        if not os.path.exists(output_dir): os.makedirs(output_dir)
        result_root = os.path.join(output_dir, 'mot_results')
        if not os.path.exists(result_root): os.makedirs(result_root)
        assert data_type in ['mot', 'kitti'], \
            "data_type should be 'mot' or 'kitti'"
        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
        # run tracking
        n_frame = 0
        timer_avgs, timer_calls = [], []
        for seq in seqs:
            save_dir = os.path.join(output_dir, 'mot_outputs',
                                    seq) if save_images or save_videos else None
            logger.info('start seq: {}'.format(seq))
            infer_dir = os.path.join(data_root, seq, 'img1')
            images = self.get_infer_images(infer_dir)
            self.dataset.set_images(images)
            dataloader = create('EvalMOTReader')(self.dataset, 0)
            result_filename = os.path.join(result_root, '{}.txt'.format(seq))
            meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read()
            frame_rate = int(meta_info[meta_info.find('frameRate') + 10:
                                       meta_info.find('\nseqLength')])
            if model_type in ['JDE', 'FairMOT']:
                results, nf, ta, tc = self._eval_seq_jde(
                    dataloader,
                    save_dir=save_dir,
                    show_image=show_image,
                    frame_rate=frame_rate)
            elif model_type in ['DeepSORT']:
                results, nf, ta, tc = self._eval_seq_sde(
                    dataloader,
                    save_dir=save_dir,
                    show_image=show_image,
                    frame_rate=frame_rate,
                    det_file=os.path.join(det_results_dir,
                                          '{}.txt'.format(seq)))
            else:
                raise ValueError(model_type)
            self.write_mot_results(result_filename, results, data_type)
            n_frame += nf
            timer_avgs.append(ta)
            timer_calls.append(tc)
            if save_videos:
                output_video_path = os.path.join(save_dir, '..',
                                                 '{}_vis.mp4'.format(seq))
                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                    save_dir, output_video_path)
                os.system(cmd_str)
                logger.info('Save video in {}.'.format(output_video_path))
            logger.info('Evaluate seq: {}'.format(seq))
            # update metrics
            for metric in self._metrics:
                metric.update(data_root, seq, data_type, result_root,
                              result_filename)
        timer_avgs = np.asarray(timer_avgs)
        timer_calls = np.asarray(timer_calls)
        all_time = np.dot(timer_avgs, timer_calls)
        avg_time = all_time / np.sum(timer_calls)
        logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(
            all_time, 1.0 / avg_time))
        # accumulate metric to log out
        for metric in self._metrics:
            metric.accumulate()
            metric.log()
        # reset metric states for metric may performed multiple times
        self._reset_metrics()
    def get_infer_images(self, infer_dir):
        assert infer_dir is None or os.path.isdir(infer_dir), \
            "{} is not a directory".format(infer_dir)
        images = set()
        assert os.path.isdir(infer_dir), \
            "infer_dir {} is not a directory".format(infer_dir)
        exts = ['jpg', 'jpeg', 'png', 'bmp']
        exts += [ext.upper() for ext in exts]
        for ext in exts:
            images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
        images = list(images)
        images.sort()
        assert len(images) > 0, "no image found in {}".format(infer_dir)
        logger.info("Found {} inference images in total.".format(len(images)))
        return images
    def mot_predict(self,
                    video_file,
                    output_dir,
                    data_type='mot',
                    model_type='JDE',
                    save_images=False,
                    save_videos=True,
                    show_image=False,
                    det_results_dir=''):
        if not os.path.exists(output_dir): os.makedirs(output_dir)
        result_root = os.path.join(output_dir, 'mot_results')
        if not os.path.exists(result_root): os.makedirs(result_root)
        assert data_type in ['mot', 'kitti'], \
            "data_type should be 'mot' or 'kitti'"
        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
        # run tracking
        seq = video_file.split('/')[-1].split('.')[0]
        save_dir = os.path.join(output_dir, 'mot_outputs',
                                seq) if save_images or save_videos else None
        logger.info('Starting tracking {}'.format(video_file))
        self.dataset.set_video(video_file)
        dataloader = create('TestMOTReader')(self.dataset, 0)
        result_filename = os.path.join(result_root, '{}.txt'.format(seq))
        frame_rate = self.dataset.frame_rate
        if model_type in ['JDE', 'FairMOT']:
            results, nf, ta, tc = self._eval_seq_jde(
                dataloader,
                save_dir=save_dir,
                show_image=show_image,
                frame_rate=frame_rate)
        elif model_type in ['DeepSORT']:
            results, nf, ta, tc = self._eval_seq_sde(
                dataloader,
                save_dir=save_dir,
                show_image=show_image,
                frame_rate=frame_rate,
                det_file=os.path.join(det_results_dir, '{}.txt'.format(seq)))
        else:
            raise ValueError(model_type)
        if save_videos:
            output_video_path = os.path.join(save_dir, '..',
                                             '{}_vis.mp4'.format(seq))
            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                save_dir, output_video_path)
            os.system(cmd_str)
            logger.info('Save video in {}'.format(output_video_path))
    def write_mot_results(self, filename, results, data_type='mot'):
        if data_type in ['mot', 'mcmot', 'lab']:
            save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
        elif data_type == 'kitti':
            save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
        else:
            raise ValueError(data_type)
        with open(filename, 'w') as f:
            for frame_id, tlwhs, track_ids in results:
                if data_type == 'kitti':
                    frame_id -= 1
                for tlwh, track_id in zip(tlwhs, track_ids):
                    if track_id < 0:
                        continue
                    x1, y1, w, h = tlwh
                    x2, y2 = x1 + w, y1 + h
                    line = save_format.format(
                        frame=frame_id,
                        id=track_id,
                        x1=x1,
                        y1=y1,
                        x2=x2,
                        y2=y2,
                        w=w,
                        h=h)
                    f.write(line)
        logger.info('MOT results save in {}'.format(filename))
    def save_results(self, data, frame_id, online_ids, online_tlwhs,
                     average_time, show_image, save_dir):
        if show_image or save_dir is not None:
            assert 'ori_image' in data
            img0 = data['ori_image'].numpy()[0]
            online_im = mot_vis.plot_tracking(
                img0,
                online_tlwhs,
                online_ids,
                frame_id=frame_id,
                fps=1. / average_time)
        if show_image:
            cv2.imshow('online_im', online_im)
        if save_dir is not None:
            cv2.imwrite(
                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                online_im)
--- a/build/lib/ppdet/engine/trainer.py
+++ b/build/lib/ppdet/engine/trainer.py
@ -0,0 +1,533 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import copy
 import time
 import numpy as np
 from PIL import Image
 import paddle
 import paddle.distributed as dist
 from paddle.distributed import fleet
 from paddle import amp
 from paddle.static import InputSpec
 from ppdet.optimizer import ModelEMA
 from ppdet.core.workspace import create
 from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from ppdet.utils.visualizer import visualize_results, save_result
 from ppdet.metrics import JDEDetMetric, JDEReIDMetric
 from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval
 from ppdet.data.source.category import get_categories
 import ppdet.utils.stats as stats
 from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter
 from .export_utils import _dump_infer_config
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('ppdet.engine')
 __all__ = ['Trainer']
 class Trainer(object):
    def __init__(self, cfg, mode='train'):
        self.cfg = cfg
        assert mode.lower() in ['train', 'eval', 'test'], \
                "mode should be 'train', 'eval' or 'test'"
        self.mode = mode.lower()
        self.optimizer = None
        self.is_loaded_weights = False
        # build data loader
        self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
        if self.mode == 'train':
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num)
        if cfg.architecture == 'JDE' and self.mode == 'train':
            cfg['JDEEmbeddingHead'][
                'num_identifiers'] = self.dataset.total_identities
        if cfg.architecture == 'FairMOT' and self.mode == 'train':
            cfg['FairMOTEmbeddingHead'][
                'num_identifiers'] = self.dataset.total_identities
        # build model
        if 'model' not in self.cfg:
            self.model = create(cfg.architecture)
        else:
            self.model = self.cfg.model
            self.is_loaded_weights = True
        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
        if self.use_ema:
            self.ema = ModelEMA(
                cfg['ema_decay'], self.model, use_thres_step=True)
        # EvalDataset build with BatchSampler to evaluate in single device
        # TODO: multi-device evaluate
        if self.mode == 'eval':
            self._eval_batch_sampler = paddle.io.BatchSampler(
                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num, self._eval_batch_sampler)
        # TestDataset build after user set images, skip loader creation here
        # build optimizer in train mode
        if self.mode == 'train':
            steps_per_epoch = len(self.loader)
            self.lr = create('LearningRate')(steps_per_epoch)
            self.optimizer = create('OptimizerBuilder')(self.lr,
                                                        self.model.parameters())
        self._nranks = dist.get_world_size()
        self._local_rank = dist.get_rank()
        self.status = {}
        self.start_epoch = 0
        self.end_epoch = cfg.epoch
        # initial default callbacks
        self._init_callbacks()
        # initial default metrics
        self._init_metrics()
        self._reset_metrics()
    def _init_callbacks(self):
        if self.mode == 'train':
            self._callbacks = [LogPrinter(self), Checkpointer(self)]
            if self.cfg.get('use_vdl', False):
                self._callbacks.append(VisualDLWriter(self))
            self._compose_callback = ComposeCallback(self._callbacks)
        elif self.mode == 'eval':
            self._callbacks = [LogPrinter(self)]
            if self.cfg.metric == 'WiderFace':
                self._callbacks.append(WiferFaceEval(self))
            self._compose_callback = ComposeCallback(self._callbacks)
        elif self.mode == 'test' and self.cfg.get('use_vdl', False):
            self._callbacks = [VisualDLWriter(self)]
            self._compose_callback = ComposeCallback(self._callbacks)
        else:
            self._callbacks = []
            self._compose_callback = None
    def _init_metrics(self, validate=False):
        if self.mode == 'test' or (self.mode == 'train' and not validate):
            self._metrics = []
            return
        classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
        if self.cfg.metric == 'COCO':
            # TODO: bias should be unified
            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
            output_eval = self.cfg['output_eval'] \
                if 'output_eval' in self.cfg else None
            save_prediction_only = self.cfg.get('save_prediction_only', False)
            # pass clsid2catid info to metric instance to avoid multiple loading
            # annotation file
            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
                                if self.mode == 'eval' else None
            # when do validation in train, annotation file should be get from
            # EvalReader instead of self.dataset(which is TrainReader)
            anno_file = self.dataset.get_anno()
            if self.mode == 'train' and validate:
                eval_dataset = self.cfg['EvalDataset']
                eval_dataset.check_or_download_dataset()
                anno_file = eval_dataset.get_anno()
            IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
            self._metrics = [
                COCOMetric(
                    anno_file=anno_file,
                    clsid2catid=clsid2catid,
                    classwise=classwise,
                    output_eval=output_eval,
                    bias=bias,
                    IouType=IouType,
                    save_prediction_only=save_prediction_only)
            ]
        elif self.cfg.metric == 'VOC':
            self._metrics = [
                VOCMetric(
                    label_list=self.dataset.get_label_list(),
                    class_num=self.cfg.num_classes,
                    map_type=self.cfg.map_type,
                    classwise=classwise)
            ]
        elif self.cfg.metric == 'WiderFace':
            multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
            self._metrics = [
                WiderFaceMetric(
                    image_dir=os.path.join(self.dataset.dataset_dir,
                                           self.dataset.image_dir),
                    anno_file=self.dataset.get_anno(),
                    multi_scale=multi_scale)
            ]
        elif self.cfg.metric == 'KeyPointTopDownCOCOEval':
            eval_dataset = self.cfg['EvalDataset']
            eval_dataset.check_or_download_dataset()
            anno_file = eval_dataset.get_anno()
            self._metrics = [
                KeyPointTopDownCOCOEval(anno_file,
                                        len(eval_dataset), self.cfg.num_joints,
                                        self.cfg.save_dir)
            ]
        elif self.cfg.metric == 'MOTDet':
            self._metrics = [JDEDetMetric(), ]
        elif self.cfg.metric == 'ReID':
            self._metrics = [JDEReIDMetric(), ]
        else:
            logger.warning("Metric not support for metric type {}".format(
                self.cfg.metric))
            self._metrics = []
    def _reset_metrics(self):
        for metric in self._metrics:
            metric.reset()
    def register_callbacks(self, callbacks):
        callbacks = [c for c in list(callbacks) if c is not None]
        for c in callbacks:
            assert isinstance(c, Callback), \
                    "metrics shoule be instances of subclass of Metric"
        self._callbacks.extend(callbacks)
        self._compose_callback = ComposeCallback(self._callbacks)
    def register_metrics(self, metrics):
        metrics = [m for m in list(metrics) if m is not None]
        for m in metrics:
            assert isinstance(m, Metric), \
                    "metrics shoule be instances of subclass of Metric"
        self._metrics.extend(metrics)
    def load_weights(self, weights):
        if self.is_loaded_weights:
            return
        self.start_epoch = 0
        if hasattr(self.model, 'detector'):
            if self.model.__class__.__name__ == 'FairMOT':
                load_pretrain_weight(self.model, weights)
            else:
                load_pretrain_weight(self.model.detector, weights)
        else:
            load_pretrain_weight(self.model, weights)
        logger.debug("Load weights {} to start training".format(weights))
    def resume_weights(self, weights):
        # support Distill resume weights
        if hasattr(self.model, 'student_model'):
            self.start_epoch = load_weight(self.model.student_model, weights,
                                           self.optimizer)
        else:
            self.start_epoch = load_weight(self.model, weights, self.optimizer)
        logger.debug("Resume weights of epoch {}".format(self.start_epoch))
    def train(self, validate=False):
        assert self.mode == 'train', "Model not in 'train' mode"
        # if validation in training is enabled, metrics should be re-init
        if validate:
            self._init_metrics(validate=validate)
            self._reset_metrics()
        model = self.model
        if self.cfg.get('fleet', False):
            model = fleet.distributed_model(model)
            self.optimizer = fleet.distributed_optimizer(self.optimizer)
        elif self._nranks > 1:
            find_unused_parameters = self.cfg[
                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
            model = paddle.DataParallel(
                self.model, find_unused_parameters=find_unused_parameters)
        # initial fp16
        if self.cfg.get('fp16', False):
            scaler = amp.GradScaler(
                enable=self.cfg.use_gpu, init_loss_scaling=1024)
        self.status.update({
            'epoch_id': self.start_epoch,
            'step_id': 0,
            'steps_per_epoch': len(self.loader)
        })
        self.status['batch_time'] = stats.SmoothedValue(
            self.cfg.log_iter, fmt='{avg:.4f}')
        self.status['data_time'] = stats.SmoothedValue(
            self.cfg.log_iter, fmt='{avg:.4f}')
        self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
        for epoch_id in range(self.start_epoch, self.cfg.epoch):
            self.status['mode'] = 'train'
            self.status['epoch_id'] = epoch_id
            self._compose_callback.on_epoch_begin(self.status)
            self.loader.dataset.set_epoch(epoch_id)
            model.train()
            iter_tic = time.time()
            for step_id, data in enumerate(self.loader):
                self.status['data_time'].update(time.time() - iter_tic)
                self.status['step_id'] = step_id
                self._compose_callback.on_step_begin(self.status)
                if self.cfg.get('fp16', False):
                    with amp.auto_cast(enable=self.cfg.use_gpu):
                        # model forward
                        outputs = model(data)
                        loss = outputs['loss']
                    # model backward
                    scaled_loss = scaler.scale(loss)
                    scaled_loss.backward()
                    # in dygraph mode, optimizer.minimize is equal to optimizer.step
                    scaler.minimize(self.optimizer, scaled_loss)
                else:
                    # model forward
                    outputs = model(data)
                    loss = outputs['loss']
                    # model backward
                    loss.backward()
                    self.optimizer.step()
                curr_lr = self.optimizer.get_lr()
                self.lr.step()
                self.optimizer.clear_grad()
                self.status['learning_rate'] = curr_lr
                if self._nranks < 2 or self._local_rank == 0:
                    self.status['training_staus'].update(outputs)
                self.status['batch_time'].update(time.time() - iter_tic)
                self._compose_callback.on_step_end(self.status)
                if self.use_ema:
                    self.ema.update(self.model)
                iter_tic = time.time()
            # apply ema weight on model
            if self.use_ema:
                weight = copy.deepcopy(self.model.state_dict())
                self.model.set_dict(self.ema.apply())
            self._compose_callback.on_epoch_end(self.status)
            if validate and (self._nranks < 2 or self._local_rank == 0) \
                    and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
                             or epoch_id == self.end_epoch - 1):
                if not hasattr(self, '_eval_loader'):
                    # build evaluation dataset and loader
                    self._eval_dataset = self.cfg.EvalDataset
                    self._eval_batch_sampler = \
                        paddle.io.BatchSampler(
                            self._eval_dataset,
                            batch_size=self.cfg.EvalReader['batch_size'])
                    self._eval_loader = create('EvalReader')(
                        self._eval_dataset,
                        self.cfg.worker_num,
                        batch_sampler=self._eval_batch_sampler)
                with paddle.no_grad():
                    self.status['save_best_model'] = True
                    self._eval_with_loader(self._eval_loader)
            # restore origin weight on model
            if self.use_ema:
                self.model.set_dict(weight)
    def _eval_with_loader(self, loader):
        sample_num = 0
        tic = time.time()
        self._compose_callback.on_epoch_begin(self.status)
        self.status['mode'] = 'eval'
        self.model.eval()
        for step_id, data in enumerate(loader):
            self.status['step_id'] = step_id
            self._compose_callback.on_step_begin(self.status)
            # forward
            outs = self.model(data)
            # update metrics
            for metric in self._metrics:
                metric.update(data, outs)
            sample_num += data['im_id'].numpy().shape[0]
            self._compose_callback.on_step_end(self.status)
        self.status['sample_num'] = sample_num
        self.status['cost_time'] = time.time() - tic
        # accumulate metric to log out
        for metric in self._metrics:
            metric.accumulate()
            metric.log()
        self._compose_callback.on_epoch_end(self.status)
        # reset metric states for metric may performed multiple times
        self._reset_metrics()
    def evaluate(self):
        with paddle.no_grad():
            self._eval_with_loader(self.loader)
    def predict(self,
                images,
                draw_threshold=0.5,
                output_dir='output',
                save_txt=False):
        self.dataset.set_images(images)
        loader = create('TestReader')(self.dataset, 0)
        imid2path = self.dataset.get_imid2path()
        anno_file = self.dataset.get_anno()
        clsid2catid, catid2name = get_categories(
            self.cfg.metric, anno_file=anno_file)
        # Run Infer 
        self.status['mode'] = 'test'
        self.model.eval()
        for step_id, data in enumerate(loader):
            self.status['step_id'] = step_id
            # forward
            outs = self.model(data)
            for key in ['im_shape', 'scale_factor', 'im_id']:
                outs[key] = data[key]
            for key, value in outs.items():
                if hasattr(value, 'numpy'):
                    outs[key] = value.numpy()
            batch_res = get_infer_results(outs, clsid2catid)
            bbox_num = outs['bbox_num']
            start = 0
            for i, im_id in enumerate(outs['im_id']):
                image_path = imid2path[int(im_id)]
                image = Image.open(image_path).convert('RGB')
                self.status['original_image'] = np.array(image.copy())
                end = start + bbox_num[i]
                bbox_res = batch_res['bbox'][start:end] \
                        if 'bbox' in batch_res else None
                mask_res = batch_res['mask'][start:end] \
                        if 'mask' in batch_res else None
                segm_res = batch_res['segm'][start:end] \
                        if 'segm' in batch_res else None
                keypoint_res = batch_res['keypoint'][start:end] \
                        if 'keypoint' in batch_res else None
                image = visualize_results(
                    image, bbox_res, mask_res, segm_res, keypoint_res,
                    int(im_id), catid2name, draw_threshold)
                self.status['result_image'] = np.array(image.copy())
                if self._compose_callback:
                    self._compose_callback.on_step_end(self.status)
                # save image with detection
                save_name = self._get_save_image_name(output_dir, image_path)
                logger.info("Detection bbox results save in {}".format(
                    save_name))
                image.save(save_name, quality=95)
                if save_txt:
                    save_path = os.path.splitext(save_name)[0] + '.txt'
                    results = {}
                    results["im_id"] = im_id
                    if bbox_res:
                        results["bbox_res"] = bbox_res
                    if keypoint_res:
                        results["keypoint_res"] = keypoint_res
                    save_result(save_path, results, catid2name, draw_threshold)
                start = end
    def _get_save_image_name(self, output_dir, image_path):
        """
        Get save image name from source image path.
        """
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_name = os.path.split(image_path)[-1]
        name, ext = os.path.splitext(image_name)
        return os.path.join(output_dir, "{}".format(name)) + ext
    def export(self, output_dir='output_inference'):
        self.model.eval()
        model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
        save_dir = os.path.join(output_dir, model_name)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        image_shape = None
        if 'inputs_def' in self.cfg['TestReader']:
            inputs_def = self.cfg['TestReader']['inputs_def']
            image_shape = inputs_def.get('image_shape', None)
        # set image_shape=[3, -1, -1] as default
        if image_shape is None:
            image_shape = [3, -1, -1]
        self.model.eval()
        if hasattr(self.model, 'deploy'): self.model.deploy = True
        # Save infer cfg
        _dump_infer_config(self.cfg,
                           os.path.join(save_dir, 'infer_cfg.yml'), image_shape,
                           self.model)
        input_spec = [{
            "image": InputSpec(
                shape=[None] + image_shape, name='image'),
            "im_shape": InputSpec(
                shape=[None, 2], name='im_shape'),
            "scale_factor": InputSpec(
                shape=[None, 2], name='scale_factor')
        }]
        # dy2st and save model
        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
            static_model = paddle.jit.to_static(
                self.model, input_spec=input_spec)
            # NOTE: dy2st do not pruned program, but jit.save will prune program
            # input spec, prune input spec here and save with pruned input spec
            pruned_input_spec = self._prune_input_spec(
                input_spec, static_model.forward.main_program,
                static_model.forward.outputs)
            paddle.jit.save(
                static_model,
                os.path.join(save_dir, 'model'),
                input_spec=pruned_input_spec)
            logger.info("Export model and saved in {}".format(save_dir))
        else:
            self.cfg.slim.save_quantized_model(
                self.model,
                os.path.join(save_dir, 'model'),
                input_spec=input_spec)
    def _prune_input_spec(self, input_spec, program, targets):
        # try to prune static program to figure out pruned input spec
        # so we perform following operations in static mode
        paddle.enable_static()
        pruned_input_spec = [{}]
        program = program.clone()
        program = program._prune(targets=targets)
        global_block = program.global_block()
        for name, spec in input_spec[0].items():
            try:
                v = global_block.var(name)
                pruned_input_spec[0][name] = spec
            except Exception:
                pass
        paddle.disable_static()
        return pruned_input_spec
--- a/build/lib/ppdet/metrics/init.py
+++ b/build/lib/ppdet/metrics/init.py
@ -0,0 +1,23 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import metrics
 from . import keypoint_metrics
 from . import mot_metrics
 from .metrics import *
 from .mot_metrics import *
 from .keypoint_metrics import *
 __all__ = metrics.__all__ + keypoint_metrics.__all__ + mot_metrics.__all__
--- a/build/lib/ppdet/metrics/coco_utils.py
+++ b/build/lib/ppdet/metrics/coco_utils.py
@ -0,0 +1,184 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 import numpy as np
 import itertools
 from ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res
 from ppdet.metrics.map_utils import draw_pr_curve
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 def get_infer_results(outs, catid, bias=0):
    """
    Get result at the stage of inference.
    The output format is dictionary containing bbox or mask result.
    For example, bbox result is a list and each element contains
    image_id, category_id, bbox and score.
    """
    if outs is None or len(outs) == 0:
        raise ValueError(
            'The number of valid detection result if zero. Please use reasonable model and check input data.'
        )
    im_id = outs['im_id']
    infer_res = {}
    if 'bbox' in outs:
        if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6:
            infer_res['bbox'] = get_det_poly_res(
                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
        else:
            infer_res['bbox'] = get_det_res(
                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
    if 'mask' in outs:
        # mask post process
        infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
                                        outs['bbox_num'], im_id, catid)
    if 'segm' in outs:
        infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
    if 'keypoint' in outs:
        infer_res['keypoint'] = get_keypoint_res(outs, im_id)
        outs['bbox_num'] = [len(infer_res['keypoint'])]
    return infer_res
 def cocoapi_eval(jsonfile,
                 style,
                 coco_gt=None,
                 anno_file=None,
                 max_dets=(100, 300, 1000),
                 classwise=False,
                 sigmas=None,
                 use_area=True):
    """
    Args:
        jsonfile (str): Evaluation json file, eg: bbox.json, mask.json.
        style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`.
        coco_gt (str): Whether to load COCOAPI through anno_file,
                 eg: coco_gt = COCO(anno_file)
        anno_file (str): COCO annotations file.
        max_dets (tuple): COCO evaluation maxDets.
        classwise (bool): Whether per-category AP and draw P-R Curve or not.
        sigmas (nparray): keypoint labelling sigmas.
        use_area (bool): If gt annotations (eg. CrowdPose, AIC)
                         do not have 'area', please set use_area=False.
    """
    assert coco_gt != None or anno_file != None
    if style == 'keypoints_crowd':
        #please install xtcocotools==1.6
        from xtcocotools.coco import COCO
        from xtcocotools.cocoeval import COCOeval
    else:
        from pycocotools.coco import COCO
        from pycocotools.cocoeval import COCOeval
    if coco_gt == None:
        coco_gt = COCO(anno_file)
    logger.info("Start evaluate...")
    coco_dt = coco_gt.loadRes(jsonfile)
    if style == 'proposal':
        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
        coco_eval.params.useCats = 0
        coco_eval.params.maxDets = list(max_dets)
    elif style == 'keypoints_crowd':
        coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area)
    else:
        coco_eval = COCOeval(coco_gt, coco_dt, style)
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    if classwise:
        # Compute per-category AP and PR curve
        try:
            from terminaltables import AsciiTable
        except Exception as e:
            logger.error(
                'terminaltables not found, plaese install terminaltables. '
                'for example: `pip install terminaltables`.')
            raise e
        precisions = coco_eval.eval['precision']
        cat_ids = coco_gt.getCatIds()
        # precision: (iou, recall, cls, area range, max dets)
        assert len(cat_ids) == precisions.shape[2]
        results_per_category = []
        for idx, catId in enumerate(cat_ids):
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            nm = coco_gt.loadCats(catId)[0]
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            if precision.size:
                ap = np.mean(precision)
            else:
                ap = float('nan')
            results_per_category.append(
                (str(nm["name"]), '{:0.3f}'.format(float(ap))))
            pr_array = precisions[0, :, idx, 0, 2]
            recall_array = np.arange(0.0, 1.01, 0.01)
            draw_pr_curve(
                pr_array,
                recall_array,
                out_dir=style + '_pr_curve',
                file_name='{}_precision_recall_curve.jpg'.format(nm["name"]))
        num_columns = min(6, len(results_per_category) * 2)
        results_flatten = list(itertools.chain(*results_per_category))
        headers = ['category', 'AP'] * (num_columns // 2)
        results_2d = itertools.zip_longest(
            *[results_flatten[i::num_columns] for i in range(num_columns)])
        table_data = [headers]
        table_data += [result for result in results_2d]
        table = AsciiTable(table_data)
        logger.info('Per-category of {} AP: \n{}'.format(style, table.table))
        logger.info("per-category PR curve has output to {} folder.".format(
            style + '_pr_curve'))
    # flush coco evaluation result
    sys.stdout.flush()
    return coco_eval.stats
 def json_eval_results(metric, json_directory, dataset):
    """
    cocoapi eval with already exists proposal.json, bbox.json or mask.json
    """
    assert metric == 'COCO'
    anno_file = dataset.get_anno()
    json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
    if json_directory:
        assert os.path.exists(
            json_directory), "The json directory:{} does not exist".format(
                json_directory)
        for k, v in enumerate(json_file_list):
            json_file_list[k] = os.path.join(str(json_directory), v)
    coco_eval_style = ['proposal', 'bbox', 'segm']
    for i, v_json in enumerate(json_file_list):
        if os.path.exists(v_json):
            cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file)
        else:
            logger.info("{} not exists!".format(v_json))
--- a/build/lib/ppdet/metrics/json_results.py
+++ b/build/lib/ppdet/metrics/json_results.py
@ -0,0 +1,149 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import six
 import numpy as np
 def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
    det_res = []
    k = 0
    for i in range(len(bbox_nums)):
        cur_image_id = int(image_id[i][0])
        det_nums = bbox_nums[i]
        for j in range(det_nums):
            dt = bboxes[k]
            k = k + 1
            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
            if int(num_id) < 0:
                continue
            category_id = label_to_cat_id_map[int(num_id)]
            w = xmax - xmin + bias
            h = ymax - ymin + bias
            bbox = [xmin, ymin, w, h]
            dt_res = {
                'image_id': cur_image_id,
                'category_id': category_id,
                'bbox': bbox,
                'score': score
            }
            det_res.append(dt_res)
    return det_res
 def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
    det_res = []
    k = 0
    for i in range(len(bbox_nums)):
        cur_image_id = int(image_id[i][0])
        det_nums = bbox_nums[i]
        for j in range(det_nums):
            dt = bboxes[k]
            k = k + 1
            num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
            if int(num_id) < 0:
                continue
            category_id = label_to_cat_id_map[int(num_id)]
            rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
            dt_res = {
                'image_id': cur_image_id,
                'category_id': category_id,
                'bbox': rbox,
                'score': score
            }
            det_res.append(dt_res)
    return det_res
 def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
    import pycocotools.mask as mask_util
    seg_res = []
    k = 0
    for i in range(len(mask_nums)):
        cur_image_id = int(image_id[i][0])
        det_nums = mask_nums[i]
        for j in range(det_nums):
            mask = masks[k].astype(np.uint8)
            score = float(bboxes[k][1])
            label = int(bboxes[k][0])
            k = k + 1
            if label == -1:
                continue
            cat_id = label_to_cat_id_map[label]
            rle = mask_util.encode(
                np.array(
                    mask[:, :, None], order="F", dtype="uint8"))[0]
            if six.PY3:
                if 'counts' in rle:
                    rle['counts'] = rle['counts'].decode("utf8")
            sg_res = {
                'image_id': cur_image_id,
                'category_id': cat_id,
                'segmentation': rle,
                'score': score
            }
            seg_res.append(sg_res)
    return seg_res
 def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
    import pycocotools.mask as mask_util
    segm_res = []
    # for each batch
    segms = results['segm'].astype(np.uint8)
    clsid_labels = results['cate_label']
    clsid_scores = results['cate_score']
    lengths = segms.shape[0]
    im_id = int(image_id[0][0])
    if lengths == 0 or segms is None:
        return None
    # for each sample
    for i in range(lengths - 1):
        clsid = int(clsid_labels[i])
        catid = num_id_to_cat_id_map[clsid]
        score = float(clsid_scores[i])
        mask = segms[i]
        segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        segm['counts'] = segm['counts'].decode('utf8')
        coco_res = {
            'image_id': im_id,
            'category_id': catid,
            'segmentation': segm,
            'score': score
        }
        segm_res.append(coco_res)
    return segm_res
 def get_keypoint_res(results, im_id):
    anns = []
    preds = results['keypoint']
    for idx in range(im_id.shape[0]):
        image_id = im_id[idx].item()
        kpts, scores = preds[idx]
        for kpt, score in zip(kpts, scores):
            kpt = kpt.flatten()
            ann = {
                'image_id': image_id,
                'category_id': 1,  # XXX hard code
                'keypoints': kpt.tolist(),
                'score': float(score)
            }
            x = kpt[0::3]
            y = kpt[1::3]
            x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item(
            ), np.max(y).item()
            ann['area'] = (x1 - x0) * (y1 - y0)
            ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
            anns.append(ann)
    return anns
--- a/build/lib/ppdet/metrics/keypoint_metrics.py
+++ b/build/lib/ppdet/metrics/keypoint_metrics.py
@ -0,0 +1,200 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import os
 import json
 from collections import defaultdict
 import numpy as np
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from ..modeling.keypoint_utils import oks_nms
 __all__ = ['KeyPointTopDownCOCOEval']
 class KeyPointTopDownCOCOEval(object):
    def __init__(self,
                 anno_file,
                 num_samples,
                 num_joints,
                 output_eval,
                 iou_type='keypoints',
                 in_vis_thre=0.2,
                 oks_thre=0.9):
        super(KeyPointTopDownCOCOEval, self).__init__()
        self.coco = COCO(anno_file)
        self.num_samples = num_samples
        self.num_joints = num_joints
        self.iou_type = iou_type
        self.in_vis_thre = in_vis_thre
        self.oks_thre = oks_thre
        self.output_eval = output_eval
        self.res_file = os.path.join(output_eval, "keypoints_results.json")
        self.reset()
    def reset(self):
        self.results = {
            'all_preds': np.zeros(
                (self.num_samples, self.num_joints, 3), dtype=np.float32),
            'all_boxes': np.zeros((self.num_samples, 6)),
            'image_path': []
        }
        self.eval_results = {}
        self.idx = 0
    def update(self, inputs, outputs):
        kpts, _ = outputs['keypoint'][0]
        num_images = inputs['image'].shape[0]
        self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
                                  3] = kpts[:, :, 0:3]
        self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
            'center'].numpy()[:, 0:2]
        self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
            'scale'].numpy()[:, 0:2]
        self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
            inputs['scale'].numpy() * 200, 1)
        self.results['all_boxes'][self.idx:self.idx + num_images,
                                  5] = np.squeeze(inputs['score'].numpy())
        self.results['image_path'].extend(inputs['im_id'].numpy())
        self.idx += num_images
    def _write_coco_keypoint_results(self, keypoints):
        data_pack = [{
            'cat_id': 1,
            'cls': 'person',
            'ann_type': 'keypoints',
            'keypoints': keypoints
        }]
        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
        if not os.path.exists(self.output_eval):
            os.makedirs(self.output_eval)
        with open(self.res_file, 'w') as f:
            json.dump(results, f, sort_keys=True, indent=4)
        try:
            json.load(open(self.res_file))
        except Exception:
            content = []
            with open(self.res_file, 'r') as f:
                for line in f:
                    content.append(line)
            content[-1] = ']'
            with open(self.res_file, 'w') as f:
                for c in content:
                    f.write(c)
    def _coco_keypoint_results_one_category_kernel(self, data_pack):
        cat_id = data_pack['cat_id']
        keypoints = data_pack['keypoints']
        cat_results = []
        for img_kpts in keypoints:
            if len(img_kpts) == 0:
                continue
            _key_points = np.array(
                [img_kpts[k]['keypoints'] for k in range(len(img_kpts))])
            _key_points = _key_points.reshape(_key_points.shape[0], -1)
            result = [{
                'image_id': img_kpts[k]['image'],
                'category_id': cat_id,
                'keypoints': _key_points[k].tolist(),
                'score': img_kpts[k]['score'],
                'center': list(img_kpts[k]['center']),
                'scale': list(img_kpts[k]['scale'])
            } for k in range(len(img_kpts))]
            cat_results.extend(result)
        return cat_results
    def get_final_results(self, preds, all_boxes, img_path):
        _kpts = []
        for idx, kpt in enumerate(preds):
            _kpts.append({
                'keypoints': kpt,
                'center': all_boxes[idx][0:2],
                'scale': all_boxes[idx][2:4],
                'area': all_boxes[idx][4],
                'score': all_boxes[idx][5],
                'image': int(img_path[idx])
            })
        # image x person x (keypoints)
        kpts = defaultdict(list)
        for kpt in _kpts:
            kpts[kpt['image']].append(kpt)
        # rescoring and oks nms
        num_joints = preds.shape[1]
        in_vis_thre = self.in_vis_thre
        oks_thre = self.oks_thre
        oks_nmsed_kpts = []
        for img in kpts.keys():
            img_kpts = kpts[img]
            for n_p in img_kpts:
                box_score = n_p['score']
                kpt_score = 0
                valid_num = 0
                for n_jt in range(0, num_joints):
                    t_s = n_p['keypoints'][n_jt][2]
                    if t_s > in_vis_thre:
                        kpt_score = kpt_score + t_s
                        valid_num = valid_num + 1
                if valid_num != 0:
                    kpt_score = kpt_score / valid_num
                # rescoring
                n_p['score'] = kpt_score * box_score
            keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
                           oks_thre)
            if len(keep) == 0:
                oks_nmsed_kpts.append(img_kpts)
            else:
                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
        self._write_coco_keypoint_results(oks_nmsed_kpts)
    def accumulate(self):
        self.get_final_results(self.results['all_preds'],
                               self.results['all_boxes'],
                               self.results['image_path'])
        coco_dt = self.coco.loadRes(self.res_file)
        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
        coco_eval.params.useSegm = None
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        keypoint_stats = []
        for ind in range(len(coco_eval.stats)):
            keypoint_stats.append((coco_eval.stats[ind]))
        self.eval_results['keypoint'] = keypoint_stats
    def log(self):
        stats_names = [
            'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
            'AR .75', 'AR (M)', 'AR (L)'
        ]
        num_values = len(stats_names)
        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
        print('|---' * (num_values + 1) + '|')
        print(' '.join([
            '| {:.3f}'.format(value) for value in self.eval_results['keypoint']
        ]) + ' |')
    def get_results(self):
        return self.eval_results
--- a/build/lib/ppdet/metrics/map_utils.py
+++ b/build/lib/ppdet/metrics/map_utils.py
@ -0,0 +1,395 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 import os
 import sys
 import numpy as np
 import itertools
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
    'draw_pr_curve',
    'bbox_area',
    'jaccard_overlap',
    'prune_zero_padding',
    'DetectionMAP',
    'ap_per_class',
    'compute_ap',
 ]
 def draw_pr_curve(precision,
                  recall,
                  iou=0.5,
                  out_dir='pr_curve',
                  file_name='precision_recall_curve.jpg'):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    output_path = os.path.join(out_dir, file_name)
    try:
        import matplotlib.pyplot as plt
    except Exception as e:
        logger.error('Matplotlib not found, plaese install matplotlib.'
                     'for example: `pip install matplotlib`.')
        raise e
    plt.cla()
    plt.figure('P-R Curve')
    plt.title('Precision/Recall Curve(IoU={})'.format(iou))
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.grid(True)
    plt.plot(recall, precision)
    plt.savefig(output_path)
 def bbox_area(bbox, is_bbox_normalized):
    """
    Calculate area of a bounding box
    """
    norm = 1. - float(is_bbox_normalized)
    width = bbox[2] - bbox[0] + norm
    height = bbox[3] - bbox[1] + norm
    return width * height
 def jaccard_overlap(pred, gt, is_bbox_normalized=False):
    """
    Calculate jaccard overlap ratio between two bounding box
    """
    if pred[0] >= gt[2] or pred[2] <= gt[0] or \
        pred[1] >= gt[3] or pred[3] <= gt[1]:
        return 0.
    inter_xmin = max(pred[0], gt[0])
    inter_ymin = max(pred[1], gt[1])
    inter_xmax = min(pred[2], gt[2])
    inter_ymax = min(pred[3], gt[3])
    inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax],
                           is_bbox_normalized)
    pred_size = bbox_area(pred, is_bbox_normalized)
    gt_size = bbox_area(gt, is_bbox_normalized)
    overlap = float(inter_size) / (pred_size + gt_size - inter_size)
    return overlap
 def prune_zero_padding(gt_box, gt_label, difficult=None):
    valid_cnt = 0
    for i in range(len(gt_box)):
        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
            break
        valid_cnt += 1
    return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
            if difficult is not None else None)
 class DetectionMAP(object):
    """
    Calculate detection mean average precision.
    Currently support two types: 11point and integral
    Args:
        class_num (int): The class number.
        overlap_thresh (float): The threshold of overlap
            ratio between prediction bounding box and 
            ground truth bounding box for deciding 
            true/false positive. Default 0.5.
        map_type (str): Calculation method of mean average
            precision, currently support '11point' and
            'integral'. Default '11point'.
        is_bbox_normalized (bool): Whether bounding boxes
            is normalized to range[0, 1]. Default False.
        evaluate_difficult (bool): Whether to evaluate
            difficult bounding boxes. Default False.
        catid2name (dict): Mapping between category id and category name.
        classwise (bool): Whether per-category AP and draw
            P-R Curve or not.
    """
    def __init__(self,
                 class_num,
                 overlap_thresh=0.5,
                 map_type='11point',
                 is_bbox_normalized=False,
                 evaluate_difficult=False,
                 catid2name=None,
                 classwise=False):
        self.class_num = class_num
        self.overlap_thresh = overlap_thresh
        assert map_type in ['11point', 'integral'], \
                "map_type currently only support '11point' "\
                "and 'integral'"
        self.map_type = map_type
        self.is_bbox_normalized = is_bbox_normalized
        self.evaluate_difficult = evaluate_difficult
        self.classwise = classwise
        self.classes = []
        for cname in catid2name.values():
            self.classes.append(cname)
        self.reset()
    def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
        """
        Update metric statics from given prediction and ground
        truth infomations.
        """
        if difficult is None:
            difficult = np.zeros_like(gt_label)
        # record class gt count
        for gtl, diff in zip(gt_label, difficult):
            if self.evaluate_difficult or int(diff) == 0:
                self.class_gt_counts[int(np.array(gtl))] += 1
        # record class score positive
        visited = [False] * len(gt_label)
        for b, s, l in zip(bbox, score, label):
            xmin, ymin, xmax, ymax = b.tolist()
            pred = [xmin, ymin, xmax, ymax]
            max_idx = -1
            max_overlap = -1.0
            for i, gl in enumerate(gt_label):
                if int(gl) == int(l):
                    overlap = jaccard_overlap(pred, gt_box[i],
                                              self.is_bbox_normalized)
                    if overlap > max_overlap:
                        max_overlap = overlap
                        max_idx = i
            if max_overlap > self.overlap_thresh:
                if self.evaluate_difficult or \
                        int(np.array(difficult[max_idx])) == 0:
                    if not visited[max_idx]:
                        self.class_score_poss[int(l)].append([s, 1.0])
                        visited[max_idx] = True
                    else:
                        self.class_score_poss[int(l)].append([s, 0.0])
            else:
                self.class_score_poss[int(l)].append([s, 0.0])
    def reset(self):
        """
        Reset metric statics
        """
        self.class_score_poss = [[] for _ in range(self.class_num)]
        self.class_gt_counts = [0] * self.class_num
        self.mAP = None
    def accumulate(self):
        """
        Accumulate metric results and calculate mAP
        """
        mAP = 0.
        valid_cnt = 0
        eval_results = []
        for score_pos, count in zip(self.class_score_poss,
                                    self.class_gt_counts):
            if count == 0: continue
            if len(score_pos) == 0:
                valid_cnt += 1
                continue
            accum_tp_list, accum_fp_list = \
                    self._get_tp_fp_accum(score_pos)
            precision = []
            recall = []
            for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
                precision.append(float(ac_tp) / (ac_tp + ac_fp))
                recall.append(float(ac_tp) / count)
            one_class_ap = 0.0
            if self.map_type == '11point':
                max_precisions = [0.] * 11
                start_idx = len(precision) - 1
                for j in range(10, -1, -1):
                    for i in range(start_idx, -1, -1):
                        if recall[i] < float(j) / 10.:
                            start_idx = i
                            if j > 0:
                                max_precisions[j - 1] = max_precisions[j]
                                break
                        else:
                            if max_precisions[j] < precision[i]:
                                max_precisions[j] = precision[i]
                one_class_ap = sum(max_precisions) / 11.
                mAP += one_class_ap
                valid_cnt += 1
            elif self.map_type == 'integral':
                import math
                prev_recall = 0.
                for i in range(len(precision)):
                    recall_gap = math.fabs(recall[i] - prev_recall)
                    if recall_gap > 1e-6:
                        one_class_ap += precision[i] * recall_gap
                        prev_recall = recall[i]
                mAP += one_class_ap
                valid_cnt += 1
            else:
                logger.error("Unspported mAP type {}".format(self.map_type))
                sys.exit(1)
            eval_results.append({
                'class': self.classes[valid_cnt - 1],
                'ap': one_class_ap,
                'precision': precision,
                'recall': recall,
            })
        self.eval_results = eval_results
        self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
    def get_map(self):
        """
        Get mAP result
        """
        if self.mAP is None:
            logger.error("mAP is not calculated.")
        if self.classwise:
            # Compute per-category AP and PR curve
            try:
                from terminaltables import AsciiTable
            except Exception as e:
                logger.error(
                    'terminaltables not found, plaese install terminaltables. '
                    'for example: `pip install terminaltables`.')
                raise e
            results_per_category = []
            for eval_result in self.eval_results:
                results_per_category.append(
                    (str(eval_result['class']),
                     '{:0.3f}'.format(float(eval_result['ap']))))
                draw_pr_curve(
                    eval_result['precision'],
                    eval_result['recall'],
                    out_dir='voc_pr_curve',
                    file_name='{}_precision_recall_curve.jpg'.format(
                        eval_result['class']))
            num_columns = min(6, len(results_per_category) * 2)
            results_flatten = list(itertools.chain(*results_per_category))
            headers = ['category', 'AP'] * (num_columns // 2)
            results_2d = itertools.zip_longest(
                *[results_flatten[i::num_columns] for i in range(num_columns)])
            table_data = [headers]
            table_data += [result for result in results_2d]
            table = AsciiTable(table_data)
            logger.info('Per-category of VOC AP: \n{}'.format(table.table))
            logger.info(
                "per-category PR curve has output to voc_pr_curve folder.")
        return self.mAP
    def _get_tp_fp_accum(self, score_pos_list):
        """
        Calculate accumulating true/false positive results from
        [score, pos] records
        """
        sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True)
        accum_tp = 0
        accum_fp = 0
        accum_tp_list = []
        accum_fp_list = []
        for (score, pos) in sorted_list:
            accum_tp += int(pos)
            accum_tp_list.append(accum_tp)
            accum_fp += 1 - int(pos)
            accum_fp_list.append(accum_fp)
        return accum_tp_list, accum_fp_list
 def ap_per_class(tp, conf, pred_cls, target_cls):
    """
    Computes the average precision, given the recall and precision curves.
    Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
    Args:
        tp (list): True positives.
        conf (list): Objectness value from 0-1.
        pred_cls (list): Predicted object classes.
        target_cls (list): Target object classes.
    """
    tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(
        pred_cls), np.array(target_cls)
    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
    # Find unique classes
    unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
    # Create Precision-Recall curve and compute AP for each class
    ap, p, r = [], [], []
    for c in unique_classes:
        i = pred_cls == c
        n_gt = sum(target_cls == c)  # Number of ground truth objects
        n_p = sum(i)  # Number of predicted objects
        if (n_p == 0) and (n_gt == 0):
            continue
        elif (n_p == 0) or (n_gt == 0):
            ap.append(0)
            r.append(0)
            p.append(0)
        else:
            # Accumulate FPs and TPs
            fpc = np.cumsum(1 - tp[i])
            tpc = np.cumsum(tp[i])
            # Recall
            recall_curve = tpc / (n_gt + 1e-16)
            r.append(tpc[-1] / (n_gt + 1e-16))
            # Precision
            precision_curve = tpc / (tpc + fpc)
            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
            # AP from recall-precision curve
            ap.append(compute_ap(recall_curve, precision_curve))
    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(
        p)
 def compute_ap(recall, precision):
    """
    Computes the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    Args:
        recall (list): The recall curve.
        precision (list): The precision curve.
    Returns:
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))
    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]
    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap
--- a/build/lib/ppdet/metrics/metrics.py
+++ b/build/lib/ppdet/metrics/metrics.py
@ -0,0 +1,301 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 import json
 import paddle
 import numpy as np
 from .map_utils import prune_zero_padding, DetectionMAP
 from .coco_utils import get_infer_results, cocoapi_eval
 from .widerface_utils import face_eval_run
 from ppdet.data.source.category import get_categories
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
    'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric', 'get_infer_results'
 ]
 COCO_SIGMAS = np.array([
    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87,
    .89, .89
 ]) / 10.0
 CROWD_SIGMAS = np.array(
    [.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79,
     .79]) / 10.0
 class Metric(paddle.metric.Metric):
    def name(self):
        return self.__class__.__name__
    def reset(self):
        pass
    def accumulate(self):
        pass
    # paddle.metric.Metric defined :metch:`update`, :meth:`accumulate`
    # :metch:`reset`, in ppdet, we also need following 2 methods:
    # abstract method for logging metric results
    def log(self):
        pass
    # abstract method for getting metric results
    def get_results(self):
        pass
 class COCOMetric(Metric):
    def __init__(self, anno_file, **kwargs):
        assert os.path.isfile(anno_file), \
                "anno_file {} not a file".format(anno_file)
        self.anno_file = anno_file
        self.clsid2catid = kwargs.get('clsid2catid', None)
        if self.clsid2catid is None:
            self.clsid2catid, _ = get_categories('COCO', anno_file)
        self.classwise = kwargs.get('classwise', False)
        self.output_eval = kwargs.get('output_eval', None)
        # TODO: bias should be unified
        self.bias = kwargs.get('bias', 0)
        self.save_prediction_only = kwargs.get('save_prediction_only', False)
        self.iou_type = kwargs.get('IouType', 'bbox')
        self.reset()
    def reset(self):
        # only bbox and mask evaluation support currently
        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
        self.eval_results = {}
    def update(self, inputs, outputs):
        outs = {}
        # outputs Tensor -> numpy.ndarray
        for k, v in outputs.items():
            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
        im_id = inputs['im_id']
        outs['im_id'] = im_id.numpy() if isinstance(im_id,
                                                    paddle.Tensor) else im_id
        infer_results = get_infer_results(
            outs, self.clsid2catid, bias=self.bias)
        self.results['bbox'] += infer_results[
            'bbox'] if 'bbox' in infer_results else []
        self.results['mask'] += infer_results[
            'mask'] if 'mask' in infer_results else []
        self.results['segm'] += infer_results[
            'segm'] if 'segm' in infer_results else []
        self.results['keypoint'] += infer_results[
            'keypoint'] if 'keypoint' in infer_results else []
    def accumulate(self):
        if len(self.results['bbox']) > 0:
            output = "bbox.json"
            if self.output_eval:
                output = os.path.join(self.output_eval, output)
            with open(output, 'w') as f:
                json.dump(self.results['bbox'], f)
                logger.info('The bbox result is saved to bbox.json.')
            if self.save_prediction_only:
                logger.info('The bbox result is saved to {} and do not '
                            'evaluate the mAP.'.format(output))
            else:
                bbox_stats = cocoapi_eval(
                    output,
                    'bbox',
                    anno_file=self.anno_file,
                    classwise=self.classwise)
                self.eval_results['bbox'] = bbox_stats
                sys.stdout.flush()
        if len(self.results['mask']) > 0:
            output = "mask.json"
            if self.output_eval:
                output = os.path.join(self.output_eval, output)
            with open(output, 'w') as f:
                json.dump(self.results['mask'], f)
                logger.info('The mask result is saved to mask.json.')
            if self.save_prediction_only:
                logger.info('The mask result is saved to {} and do not '
                            'evaluate the mAP.'.format(output))
            else:
                seg_stats = cocoapi_eval(
                    output,
                    'segm',
                    anno_file=self.anno_file,
                    classwise=self.classwise)
                self.eval_results['mask'] = seg_stats
                sys.stdout.flush()
        if len(self.results['segm']) > 0:
            output = "segm.json"
            if self.output_eval:
                output = os.path.join(self.output_eval, output)
            with open(output, 'w') as f:
                json.dump(self.results['segm'], f)
                logger.info('The segm result is saved to segm.json.')
            if self.save_prediction_only:
                logger.info('The segm result is saved to {} and do not '
                            'evaluate the mAP.'.format(output))
            else:
                seg_stats = cocoapi_eval(
                    output,
                    'segm',
                    anno_file=self.anno_file,
                    classwise=self.classwise)
                self.eval_results['mask'] = seg_stats
                sys.stdout.flush()
        if len(self.results['keypoint']) > 0:
            output = "keypoint.json"
            if self.output_eval:
                output = os.path.join(self.output_eval, output)
            with open(output, 'w') as f:
                json.dump(self.results['keypoint'], f)
                logger.info('The keypoint result is saved to keypoint.json.')
            if self.save_prediction_only:
                logger.info('The keypoint result is saved to {} and do not '
                            'evaluate the mAP.'.format(output))
            else:
                style = 'keypoints'
                use_area = True
                sigmas = COCO_SIGMAS
                if self.iou_type == 'keypoints_crowd':
                    style = 'keypoints_crowd'
                    use_area = False
                    sigmas = CROWD_SIGMAS
                keypoint_stats = cocoapi_eval(
                    output,
                    style,
                    anno_file=self.anno_file,
                    classwise=self.classwise,
                    sigmas=sigmas,
                    use_area=use_area)
                self.eval_results['keypoint'] = keypoint_stats
                sys.stdout.flush()
    def log(self):
        pass
    def get_results(self):
        return self.eval_results
 class VOCMetric(Metric):
    def __init__(self,
                 label_list,
                 class_num=20,
                 overlap_thresh=0.5,
                 map_type='11point',
                 is_bbox_normalized=False,
                 evaluate_difficult=False,
                 classwise=False):
        assert os.path.isfile(label_list), \
                "label_list {} not a file".format(label_list)
        self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
        self.overlap_thresh = overlap_thresh
        self.map_type = map_type
        self.evaluate_difficult = evaluate_difficult
        self.detection_map = DetectionMAP(
            class_num=class_num,
            overlap_thresh=overlap_thresh,
            map_type=map_type,
            is_bbox_normalized=is_bbox_normalized,
            evaluate_difficult=evaluate_difficult,
            catid2name=self.catid2name,
            classwise=classwise)
        self.reset()
    def reset(self):
        self.detection_map.reset()
    def update(self, inputs, outputs):
        bbox_np = outputs['bbox'].numpy()
        bboxes = bbox_np[:, 2:]
        scores = bbox_np[:, 1]
        labels = bbox_np[:, 0]
        bbox_lengths = outputs['bbox_num'].numpy()
        if bboxes.shape == (1, 1) or bboxes is None:
            return
        gt_boxes = inputs['gt_bbox']
        gt_labels = inputs['gt_class']
        difficults = inputs['difficult'] if not self.evaluate_difficult \
                            else None
        scale_factor = inputs['scale_factor'].numpy(
        ) if 'scale_factor' in inputs else np.ones(
            (gt_boxes.shape[0], 2)).astype('float32')
        bbox_idx = 0
        for i in range(len(gt_boxes)):
            gt_box = gt_boxes[i].numpy()
            h, w = scale_factor[i]
            gt_box = gt_box / np.array([w, h, w, h])
            gt_label = gt_labels[i].numpy()
            difficult = None if difficults is None \
                            else difficults[i].numpy()
            bbox_num = bbox_lengths[i]
            bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
            score = scores[bbox_idx:bbox_idx + bbox_num]
            label = labels[bbox_idx:bbox_idx + bbox_num]
            gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
                                                             difficult)
            self.detection_map.update(bbox, score, label, gt_box, gt_label,
                                      difficult)
            bbox_idx += bbox_num
    def accumulate(self):
        logger.info("Accumulating evaluatation results...")
        self.detection_map.accumulate()
    def log(self):
        map_stat = 100. * self.detection_map.get_map()
        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(self.overlap_thresh,
                                                       self.map_type, map_stat))
    def get_results(self):
        return {'bbox': [self.detection_map.get_map()]}
 class WiderFaceMetric(Metric):
    def __init__(self, image_dir, anno_file, multi_scale=True):
        self.image_dir = image_dir
        self.anno_file = anno_file
        self.multi_scale = multi_scale
        self.clsid2catid, self.catid2name = get_categories('widerface')
    def update(self, model):
        face_eval_run(
            model,
            self.image_dir,
            self.anno_file,
            pred_dir='output/pred',
            eval_mode='widerface',
            multi_scale=self.multi_scale)
--- a/build/lib/ppdet/metrics/mot_eval_utils.py
+++ b/build/lib/ppdet/metrics/mot_eval_utils.py
@ -0,0 +1,191 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import numpy as np
 import copy
 import motmetrics as mm
 mm.lap.default_solver = 'lap'
 __all__ = [
    'read_mot_results',
    'unzip_objs',
    'MOTEvaluator',
 ]
 def read_mot_results(filename, is_gt=False, is_ignore=False):
    valid_labels = {1}
    ignore_labels = {2, 7, 8, 12}
    results_dict = dict()
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            for line in f.readlines():
                linelist = line.split(',')
                if len(linelist) < 7:
                    continue
                fid = int(linelist[0])
                if fid < 1:
                    continue
                results_dict.setdefault(fid, list())
                box_size = float(linelist[4]) * float(linelist[5])
                if is_gt:
                    if 'MOT16-' in filename or 'MOT17-' in filename or 'MOT15-' in filename or 'MOT20-' in filename:
                        label = int(float(linelist[7]))
                        mark = int(float(linelist[6]))
                        if mark == 0 or label not in valid_labels:
                            continue
                    score = 1
                elif is_ignore:
                    if 'MOT16-' in filename or 'MOT17-' in filename or 'MOT15-' in filename or 'MOT20-' in filename:
                        label = int(float(linelist[7]))
                        vis_ratio = float(linelist[8])
                        if label not in ignore_labels and vis_ratio >= 0:
                            continue
                    else:
                        continue
                    score = 1
                else:
                    score = float(linelist[6])
                tlwh = tuple(map(float, linelist[2:6]))
                target_id = int(linelist[1])
                results_dict[fid].append((tlwh, target_id, score))
    return results_dict
 """
 labels={'ped', ...			    % 1
        'person_on_vhcl', ...	% 2
        'car', ...				% 3
        'bicycle', ...			% 4
        'mbike', ...			% 5
        'non_mot_vhcl', ...		% 6
        'static_person', ...	% 7
        'distractor', ...		% 8
        'occluder', ...			% 9
        'occluder_on_grnd', ...	% 10
        'occluder_full', ...	% 11
        'reflection', ...		% 12
        'crowd' ...			    % 13
 };
 """
 def unzip_objs(objs):
    if len(objs) > 0:
        tlwhs, ids, scores = zip(*objs)
    else:
        tlwhs, ids, scores = [], [], []
    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
    return tlwhs, ids, scores
 class MOTEvaluator(object):
    def __init__(self, data_root, seq_name, data_type):
        self.data_root = data_root
        self.seq_name = seq_name
        self.data_type = data_type
        self.load_annotations()
        self.reset_accumulator()
    def load_annotations(self):
        assert self.data_type == 'mot'
        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
                                   'gt.txt')
        self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
        self.gt_ignore_frame_dict = read_mot_results(
            gt_filename, is_ignore=True)
    def reset_accumulator(self):
        self.acc = mm.MOTAccumulator(auto_id=True)
    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
        # results
        trk_tlwhs = np.copy(trk_tlwhs)
        trk_ids = np.copy(trk_ids)
        # gts
        gt_objs = self.gt_frame_dict.get(frame_id, [])
        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
        # ignore boxes
        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
        ignore_tlwhs = unzip_objs(ignore_objs)[0]
        # remove ignored results
        keep = np.ones(len(trk_tlwhs), dtype=bool)
        iou_distance = mm.distances.iou_matrix(
            ignore_tlwhs, trk_tlwhs, max_iou=0.5)
        if len(iou_distance) > 0:
            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
            match_ious = iou_distance[match_is, match_js]
            match_js = np.asarray(match_js, dtype=int)
            match_js = match_js[np.logical_not(np.isnan(match_ious))]
            keep[match_js] = False
            trk_tlwhs = trk_tlwhs[keep]
            trk_ids = trk_ids[keep]
        # get distance matrix
        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
        # acc
        self.acc.update(gt_ids, trk_ids, iou_distance)
        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
                                                            'last_mot_events'):
            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
        else:
            events = None
        return events
    def eval_file(self, filename):
        self.reset_accumulator()
        result_frame_dict = read_mot_results(filename, is_gt=False)
        frames = sorted(list(set(result_frame_dict.keys())))
        for frame_id in frames:
            trk_objs = result_frame_dict.get(frame_id, [])
            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
        return self.acc
    @staticmethod
    def get_summary(accs,
                    names,
                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
                             'precision', 'recall')):
        names = copy.deepcopy(names)
        if metrics is None:
            metrics = mm.metrics.motchallenge_metrics
        metrics = copy.deepcopy(metrics)
        mh = mm.metrics.create()
        summary = mh.compute_many(
            accs, metrics=metrics, names=names, generate_overall=True)
        return summary
    @staticmethod
    def save_summary(summary, filename):
        import pandas as pd
        writer = pd.ExcelWriter(filename)
        summary.to_excel(writer)
        writer.save()
--- a/build/lib/ppdet/metrics/mot_metrics.py
+++ b/build/lib/ppdet/metrics/mot_metrics.py
@ -0,0 +1,183 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import paddle
 import numpy as np
 from scipy import interpolate
 import paddle.nn.functional as F
 from .map_utils import ap_per_class
 from ppdet.modeling.bbox_utils import bbox_iou_np_expand
 from .mot_eval_utils import MOTEvaluator
 from .metrics import Metric
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = ['JDEDetMetric', 'JDEReIDMetric', 'MOTMetric']
 class JDEDetMetric(Metric):
    def __init__(self, overlap_thresh=0.5):
        self.overlap_thresh = overlap_thresh
        self.reset()
    def reset(self):
        self.AP_accum = np.zeros(1)
        self.AP_accum_count = np.zeros(1)
    def update(self, inputs, outputs):
        bboxes = outputs['bbox'][:, 2:].numpy()
        scores = outputs['bbox'][:, 1].numpy()
        labels = outputs['bbox'][:, 0].numpy()
        bbox_lengths = outputs['bbox_num'].numpy()
        if bboxes.shape[0] == 1 and bboxes.sum() == 0.0:
            return
        gt_boxes = inputs['gt_bbox'].numpy()[0]
        gt_labels = inputs['gt_class'].numpy()[0]
        if gt_labels.shape[0] == 0:
            return
        correct = []
        detected = []
        for i in range(bboxes.shape[0]):
            obj_pred = 0
            pred_bbox = bboxes[i].reshape(1, 4)
            # Compute iou with target boxes
            iou = bbox_iou_np_expand(pred_bbox, gt_boxes, x1y1x2y2=True)[0]
            # Extract index of largest overlap
            best_i = np.argmax(iou)
            # If overlap exceeds threshold and classification is correct mark as correct
            if iou[best_i] > self.overlap_thresh and obj_pred == gt_labels[
                    best_i] and best_i not in detected:
                correct.append(1)
                detected.append(best_i)
            else:
                correct.append(0)
        # Compute Average Precision (AP) per class
        target_cls = list(gt_labels.T[0])
        AP, AP_class, R, P = ap_per_class(
            tp=correct,
            conf=scores,
            pred_cls=np.zeros_like(scores),
            target_cls=target_cls)
        self.AP_accum_count += np.bincount(AP_class, minlength=1)
        self.AP_accum += np.bincount(AP_class, minlength=1, weights=AP)
    def accumulate(self):
        logger.info("Accumulating evaluatation results...")
        self.map_stat = self.AP_accum[0] / (self.AP_accum_count[0] + 1E-16)
    def log(self):
        map_stat = 100. * self.map_stat
        logger.info("mAP({:.2f}) = {:.2f}%".format(self.overlap_thresh,
                                                   map_stat))
    def get_results(self):
        return self.map_stat
 class JDEReIDMetric(Metric):
    def __init__(self, far_levels=[1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]):
        self.far_levels = far_levels
        self.reset()
    def reset(self):
        self.embedding = []
        self.id_labels = []
        self.eval_results = {}
    def update(self, inputs, outputs):
        for out in outputs:
            feat, label = out[:-1].clone().detach(), int(out[-1])
            if label != -1:
                self.embedding.append(feat)
                self.id_labels.append(label)
    def accumulate(self):
        logger.info("Computing pairwise similairity...")
        assert len(self.embedding) == len(self.id_labels)
        if len(self.embedding) < 1:
            return None
        embedding = paddle.stack(self.embedding, axis=0)
        emb = F.normalize(embedding, axis=1).numpy()
        pdist = np.matmul(emb, emb.T)
        id_labels = np.array(self.id_labels, dtype='int32').reshape(-1, 1)
        n = len(id_labels)
        id_lbl = np.tile(id_labels, n).T
        gt = id_lbl == id_lbl.T
        up_triangle = np.where(np.triu(pdist) - np.eye(n) * pdist != 0)
        pdist = pdist[up_triangle]
        gt = gt[up_triangle]
        # lazy import metrics here
        from sklearn import metrics
        far, tar, threshold = metrics.roc_curve(gt, pdist)
        interp = interpolate.interp1d(far, tar)
        tar_at_far = [interp(x) for x in self.far_levels]
        for f, fa in enumerate(self.far_levels):
            self.eval_results['TPR@FAR={:.7f}'.format(fa)] = ' {:.4f}'.format(
                tar_at_far[f])
    def log(self):
        for k, v in self.eval_results.items():
            logger.info('{}: {}'.format(k, v))
    def get_results(self):
        return self.eval_results
 class MOTMetric(Metric):
    def __init__(self, save_summary=False):
        self.save_summary = save_summary
        self.MOTEvaluator = MOTEvaluator
        self.result_root = None
        self.reset()
    def reset(self):
        self.accs = []
        self.seqs = []
    def update(self, data_root, seq, data_type, result_root, result_filename):
        evaluator = self.MOTEvaluator(data_root, seq, data_type)
        self.accs.append(evaluator.eval_file(result_filename))
        self.seqs.append(seq)
        self.result_root = result_root
    def accumulate(self):
        import motmetrics as mm
        metrics = mm.metrics.motchallenge_metrics
        mh = mm.metrics.create()
        summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
        self.strsummary = mm.io.render_summary(
            summary,
            formatters=mh.formatters,
            namemap=mm.io.motchallenge_metric_names)
        if self.save_summary:
            self.MOTEvaluator.save_summary(
                summary, os.path.join(self.result_root, 'summary.xlsx'))
    def log(self):
        print(self.strsummary)
    def get_results(self):
        return self.strsummary
--- a/build/lib/ppdet/metrics/widerface_utils.py
+++ b/build/lib/ppdet/metrics/widerface_utils.py
@ -0,0 +1,391 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import cv2
 import numpy as np
 from collections import OrderedDict
 import paddle
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = ['face_eval_run', 'lmk2out']
 def face_eval_run(model,
                  image_dir,
                  gt_file,
                  pred_dir='output/pred',
                  eval_mode='widerface',
                  multi_scale=False):
    # load ground truth files
    with open(gt_file, 'r') as f:
        gt_lines = f.readlines()
    imid2path = []
    pos_gt = 0
    while pos_gt < len(gt_lines):
        name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
        imid2path.append(name_gt)
        pos_gt += 1
        n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
        pos_gt += 1 + n_gt
    logger.info('The ground truth file load {} images'.format(len(imid2path)))
    dets_dist = OrderedDict()
    for iter_id, im_path in enumerate(imid2path):
        image_path = os.path.join(image_dir, im_path)
        if eval_mode == 'fddb':
            image_path += '.jpg'
        assert os.path.exists(image_path)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if multi_scale:
            shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
            det0 = detect_face(model, image, shrink)
            det1 = flip_test(model, image, shrink)
            [det2, det3] = multi_scale_test(model, image, max_shrink)
            det4 = multi_scale_test_pyramid(model, image, max_shrink)
            det = np.row_stack((det0, det1, det2, det3, det4))
            dets = bbox_vote(det)
        else:
            dets = detect_face(model, image, 1)
        if eval_mode == 'widerface':
            save_widerface_bboxes(image_path, dets, pred_dir)
        else:
            dets_dist[im_path] = dets
        if iter_id % 100 == 0:
            logger.info('Test iter {}'.format(iter_id))
    if eval_mode == 'fddb':
        save_fddb_bboxes(dets_dist, pred_dir)
    logger.info("Finish evaluation.")
 def detect_face(model, image, shrink):
    image_shape = [image.shape[0], image.shape[1]]
    if shrink != 1:
        h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
        image = cv2.resize(image, (w, h))
        image_shape = [h, w]
    img = face_img_process(image)
    image_shape = np.asarray([image_shape])
    scale_factor = np.asarray([[shrink, shrink]])
    data = {
        "image": paddle.to_tensor(
            img, dtype='float32'),
        "im_shape": paddle.to_tensor(
            image_shape, dtype='float32'),
        "scale_factor": paddle.to_tensor(
            scale_factor, dtype='float32')
    }
    model.eval()
    detection = model(data)
    detection = detection['bbox'].numpy()
    # layout: xmin, ymin, xmax. ymax, score
    if np.prod(detection.shape) == 1:
        logger.info("No face detected")
        return np.array([[0, 0, 0, 0, 0]])
    det_conf = detection[:, 1]
    det_xmin = detection[:, 2]
    det_ymin = detection[:, 3]
    det_xmax = detection[:, 4]
    det_ymax = detection[:, 5]
    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
    return det
 def flip_test(model, image, shrink):
    img = cv2.flip(image, 1)
    det_f = detect_face(model, img, shrink)
    det_t = np.zeros(det_f.shape)
    img_width = image.shape[1]
    det_t[:, 0] = img_width - det_f[:, 2]
    det_t[:, 1] = det_f[:, 1]
    det_t[:, 2] = img_width - det_f[:, 0]
    det_t[:, 3] = det_f[:, 3]
    det_t[:, 4] = det_f[:, 4]
    return det_t
 def multi_scale_test(model, image, max_shrink):
    # Shrink detecting is only used to detect big faces
    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
    det_s = detect_face(model, image, st)
    index = np.where(
        np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
        > 30)[0]
    det_s = det_s[index, :]
    # Enlarge one times
    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
    det_b = detect_face(model, image, bt)
    # Enlarge small image x times for small faces
    if max_shrink > 2:
        bt *= 2
        while bt < max_shrink:
            det_b = np.row_stack((det_b, detect_face(model, image, bt)))
            bt *= 2
        det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
    # Enlarged images are only used to detect small faces.
    if bt > 1:
        index = np.where(
            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
        det_b = det_b[index, :]
    # Shrinked images are only used to detect big faces.
    else:
        index = np.where(
            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
        det_b = det_b[index, :]
    return det_s, det_b
 def multi_scale_test_pyramid(model, image, max_shrink):
    # Use image pyramids to detect faces
    det_b = detect_face(model, image, 0.25)
    index = np.where(
        np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
        > 30)[0]
    det_b = det_b[index, :]
    st = [0.75, 1.25, 1.5, 1.75]
    for i in range(len(st)):
        if st[i] <= max_shrink:
            det_temp = detect_face(model, image, st[i])
            # Enlarged images are only used to detect small faces.
            if st[i] > 1:
                index = np.where(
                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
                det_temp = det_temp[index, :]
            # Shrinked images are only used to detect big faces.
            else:
                index = np.where(
                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
                det_temp = det_temp[index, :]
            det_b = np.row_stack((det_b, det_temp))
    return det_b
 def to_chw(image):
    """
    Transpose image from HWC to CHW.
    Args:
        image (np.array): an image with HWC layout.
    """
    # HWC to CHW
    if len(image.shape) == 3:
        image = np.swapaxes(image, 1, 2)
        image = np.swapaxes(image, 1, 0)
    return image
 def face_img_process(image,
                     mean=[104., 117., 123.],
                     std=[127.502231, 127.502231, 127.502231]):
    img = np.array(image)
    img = to_chw(img)
    img = img.astype('float32')
    img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
    img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
    img = [img]
    img = np.array(img)
    return img
 def get_shrink(height, width):
    """
    Args:
        height (int): image height.
        width (int): image width.
    """
    # avoid out of memory
    max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
    max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
    def get_round(x, loc):
        str_x = str(x)
        if '.' in str_x:
            str_before, str_after = str_x.split('.')
            len_after = len(str_after)
            if len_after >= 3:
                str_final = str_before + '.' + str_after[0:loc]
                return float(str_final)
            else:
                return x
    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
    if max_shrink >= 1.5 and max_shrink < 2:
        max_shrink = max_shrink - 0.1
    elif max_shrink >= 2 and max_shrink < 3:
        max_shrink = max_shrink - 0.2
    elif max_shrink >= 3 and max_shrink < 4:
        max_shrink = max_shrink - 0.3
    elif max_shrink >= 4 and max_shrink < 5:
        max_shrink = max_shrink - 0.4
    elif max_shrink >= 5:
        max_shrink = max_shrink - 0.5
    elif max_shrink <= 0.1:
        max_shrink = 0.1
    shrink = max_shrink if max_shrink < 1 else 1
    return shrink, max_shrink
 def bbox_vote(det):
    order = det[:, 4].ravel().argsort()[::-1]
    det = det[order, :]
    if det.shape[0] == 0:
        dets = np.array([[10, 10, 20, 20, 0.002]])
        det = np.empty(shape=[0, 5])
    while det.shape[0] > 0:
        # IOU
        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
        xx1 = np.maximum(det[0, 0], det[:, 0])
        yy1 = np.maximum(det[0, 1], det[:, 1])
        xx2 = np.minimum(det[0, 2], det[:, 2])
        yy2 = np.minimum(det[0, 3], det[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[0] + area[:] - inter)
        # nms
        merge_index = np.where(o >= 0.3)[0]
        det_accu = det[merge_index, :]
        det = np.delete(det, merge_index, 0)
        if merge_index.shape[0] <= 1:
            if det.shape[0] == 0:
                try:
                    dets = np.row_stack((dets, det_accu))
                except:
                    dets = det_accu
            continue
        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
        max_score = np.max(det_accu[:, 4])
        det_accu_sum = np.zeros((1, 5))
        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
                                      axis=0) / np.sum(det_accu[:, -1:])
        det_accu_sum[:, 4] = max_score
        try:
            dets = np.row_stack((dets, det_accu_sum))
        except:
            dets = det_accu_sum
    dets = dets[0:750, :]
    keep_index = np.where(dets[:, 4] >= 0.01)[0]
    dets = dets[keep_index, :]
    return dets
 def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
    image_name = image_path.split('/')[-1]
    image_class = image_path.split('/')[-2]
    odir = os.path.join(output_dir, image_class)
    if not os.path.exists(odir):
        os.makedirs(odir)
    ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
    f = open(ofname, 'w')
    f.write('{:s}\n'.format(image_class + '/' + image_name))
    f.write('{:d}\n'.format(bboxes_scores.shape[0]))
    for box_score in bboxes_scores:
        xmin, ymin, xmax, ymax, score = box_score
        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
            xmax - xmin + 1), (ymax - ymin + 1), score))
    f.close()
    logger.info("The predicted result is saved as {}".format(ofname))
 def save_fddb_bboxes(bboxes_scores,
                     output_dir,
                     output_fname='pred_fddb_res.txt'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    predict_file = os.path.join(output_dir, output_fname)
    f = open(predict_file, 'w')
    for image_path, dets in bboxes_scores.iteritems():
        f.write('{:s}\n'.format(image_path))
        f.write('{:d}\n'.format(dets.shape[0]))
        for box_score in dets:
            xmin, ymin, xmax, ymax, score = box_score
            width, height = xmax - xmin, ymax - ymin
            f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
                    .format(xmin, ymin, width, height, score))
    logger.info("The predicted result is saved as {}".format(predict_file))
    return predict_file
 def lmk2out(results, is_bbox_normalized=False):
    """
    Args:
        results: request a dict, should include: `landmark`, `im_id`,
                 if is_bbox_normalized=True, also need `im_shape`.
        is_bbox_normalized: whether or not landmark is normalized.
    """
    xywh_res = []
    for t in results:
        bboxes = t['bbox'][0]
        lengths = t['bbox'][1][0]
        im_ids = np.array(t['im_id'][0]).flatten()
        if bboxes.shape == (1, 1) or bboxes is None:
            continue
        face_index = t['face_index'][0]
        prior_box = t['prior_boxes'][0]
        predict_lmk = t['landmark'][0]
        prior = np.reshape(prior_box, (-1, 4))
        predictlmk = np.reshape(predict_lmk, (-1, 10))
        k = 0
        for a in range(len(lengths)):
            num = lengths[a]
            im_id = int(im_ids[a])
            for i in range(num):
                score = bboxes[k][1]
                theindex = face_index[i][0]
                me_prior = prior[theindex, :]
                lmk_pred = predictlmk[theindex, :]
                prior_w = me_prior[2] - me_prior[0]
                prior_h = me_prior[3] - me_prior[1]
                prior_w_center = (me_prior[2] + me_prior[0]) / 2
                prior_h_center = (me_prior[3] + me_prior[1]) / 2
                lmk_decode = np.zeros((10))
                for j in [0, 2, 4, 6, 8]:
                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_w_center
                for j in [1, 3, 5, 7, 9]:
                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_h_center
                im_shape = t['im_shape'][0][a].tolist()
                image_h, image_w = int(im_shape[0]), int(im_shape[1])
                if is_bbox_normalized:
                    lmk_decode = lmk_decode * np.array([
                        image_w, image_h, image_w, image_h, image_w, image_h,
                        image_w, image_h, image_w, image_h
                    ])
                lmk_res = {
                    'image_id': im_id,
                    'landmark': lmk_decode,
                    'score': score,
                }
                xywh_res.append(lmk_res)
                k += 1
    return xywh_res
--- a/build/lib/ppdet/model_zoo/MODEL_ZOO
+++ b/build/lib/ppdet/model_zoo/MODEL_ZOO
@ -0,0 +1,100 @@
 cascade_rcnn\cascade_mask_rcnn_r50_fpn_1x_coco
 cascade_rcnn\cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco
 cascade_rcnn\cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco
 cascade_rcnn\cascade_rcnn_r50_fpn_1x_coco
 cascade_rcnn\cascade_rcnn_r50_vd_fpn_ssld_1x_coco
 cascade_rcnn\cascade_rcnn_r50_vd_fpn_ssld_2x_coco
 dcn\cascade_rcnn_dcn_r50_fpn_1x_coco
 dcn\cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
 dcn\faster_rcnn_dcn_r101_vd_fpn_1x_coco
 dcn\faster_rcnn_dcn_r50_fpn_1x_coco
 dcn\faster_rcnn_dcn_r50_vd_fpn_1x_coco
 dcn\faster_rcnn_dcn_r50_vd_fpn_2x_coco
 dcn\faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
 dcn\mask_rcnn_dcn_r101_vd_fpn_1x_coco
 dcn\mask_rcnn_dcn_r50_fpn_1x_coco
 dcn\mask_rcnn_dcn_r50_vd_fpn_2x_coco
 dcn\mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco
 dota\s2anet_1x_dota
 dota\s2anet_conv_1x_dota
 face_detection\blazeface_1000e
 face_detection\blazeface_fpn_ssh_1000e
 faster_rcnn\faster_rcnn_r101_1x_coco
 faster_rcnn\faster_rcnn_r101_fpn_1x_coco
 faster_rcnn\faster_rcnn_r101_fpn_2x_coco
 faster_rcnn\faster_rcnn_r101_vd_fpn_1x_coco
 faster_rcnn\faster_rcnn_r101_vd_fpn_2x_coco
 faster_rcnn\faster_rcnn_r34_fpn_1x_coco
 faster_rcnn\faster_rcnn_r34_vd_fpn_1x_coco
 faster_rcnn\faster_rcnn_r50_1x_coco
 faster_rcnn\faster_rcnn_r50_fpn_1x_coco
 faster_rcnn\faster_rcnn_r50_fpn_2x_coco
 faster_rcnn\faster_rcnn_r50_vd_1x_coco
 faster_rcnn\faster_rcnn_r50_vd_fpn_1x_coco
 faster_rcnn\faster_rcnn_r50_vd_fpn_2x_coco
 faster_rcnn\faster_rcnn_r50_vd_fpn_ssld_1x_coco
 faster_rcnn\faster_rcnn_r50_vd_fpn_ssld_2x_coco
 faster_rcnn\faster_rcnn_x101_vd_64x4d_fpn_1x_coco
 faster_rcnn\faster_rcnn_x101_vd_64x4d_fpn_2x_coco
 fcos\fcos_dcn_r50_fpn_1x_coco
 fcos\fcos_r50_fpn_1x_coco
 fcos\fcos_r50_fpn_multiscale_2x_coco
 gn\cascade_mask_rcnn_r50_fpn_gn_2x_coco
 gn\cascade_rcnn_r50_fpn_gn_2x_coco
 gn\faster_rcnn_r50_fpn_gn_2x_coco
 gn\mask_rcnn_r50_fpn_gn_2x_coco
 hrnet\faster_rcnn_hrnetv2p_w18_1x_coco
 hrnet\faster_rcnn_hrnetv2p_w18_2x_coco
 mask_rcnn\mask_rcnn_r101_fpn_1x_coco
 mask_rcnn\mask_rcnn_r101_vd_fpn_1x_coco
 mask_rcnn\mask_rcnn_r50_1x_coco
 mask_rcnn\mask_rcnn_r50_2x_coco
 mask_rcnn\mask_rcnn_r50_fpn_1x_coco
 mask_rcnn\mask_rcnn_r50_fpn_2x_coco
 mask_rcnn\mask_rcnn_r50_vd_fpn_1x_coco
 mask_rcnn\mask_rcnn_r50_vd_fpn_2x_coco
 mask_rcnn\mask_rcnn_r50_vd_fpn_ssld_1x_coco
 mask_rcnn\mask_rcnn_r50_vd_fpn_ssld_2x_coco
 mask_rcnn\mask_rcnn_x101_vd_64x4d_fpn_1x_coco
 mask_rcnn\mask_rcnn_x101_vd_64x4d_fpn_2x_coco
 pedestrian\pedestrian_yolov3_darknet
 ppyolo\ppyolov2_r101vd_dcn_365e_coco
 ppyolo\ppyolov2_r50vd_dcn_365e_coco
 ppyolo\ppyolov2_r50vd_dcn_voc
 ppyolo\ppyolo_mbv3_large_coco
 ppyolo\ppyolo_mbv3_small_coco
 ppyolo\ppyolo_r18vd_coco
 ppyolo\ppyolo_r50vd_dcn_1x_coco
 ppyolo\ppyolo_r50vd_dcn_1x_minicoco
 ppyolo\ppyolo_r50vd_dcn_2x_coco
 ppyolo\ppyolo_r50vd_dcn_voc
 ppyolo\ppyolo_test
 ppyolo\ppyolo_tiny_650e_coco
 rcnn_enhance\faster_rcnn_enhance_3x_coco
 res2net\faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco
 res2net\mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco
 res2net\mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco
 solov2\solov2_r50_fpn_1x_coco
 solov2\solov2_r50_fpn_3x_coco
 ssd\ssdlite_ghostnet_320_coco
 ssd\ssdlite_mobilenet_v1_300_coco
 ssd\ssdlite_mobilenet_v3_large_320_coco
 ssd\ssdlite_mobilenet_v3_small_320_coco
 ssd\ssd_mobilenet_v1_300_120e_voc
 ssd\ssd_vgg16_300_240e_voc
 ttfnet\pafnet_10x_coco
 ttfnet\pafnet_lite_mobilenet_v3_20x_coco
 ttfnet\ttfnet_darknet53_1x_coco
 vehicle\vehicle_yolov3_darknet
 yolov3\yolov3_darknet53_270e_coco
 yolov3\yolov3_darknet53_270e_voc
 yolov3\yolov3_mobilenet_v1_270e_coco
 yolov3\yolov3_mobilenet_v1_270e_voc
 yolov3\yolov3_mobilenet_v1_roadsign
 yolov3\yolov3_mobilenet_v1_ssld_270e_coco
 yolov3\yolov3_mobilenet_v1_ssld_270e_voc
 yolov3\yolov3_mobilenet_v3_large_270e_coco
 yolov3\yolov3_mobilenet_v3_large_270e_voc
 yolov3\yolov3_mobilenet_v3_large_ssld_270e_voc
 yolov3\yolov3_r34_270e_coco
 yolov3\yolov3_r50vd_dcn_270e_coco
--- a/build/lib/ppdet/model_zoo/init.py
+++ b/build/lib/ppdet/model_zoo/init.py
@ -0,0 +1,18 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from . import model_zoo
 from .model_zoo import *
 __all__ = model_zoo.__all__
--- a/build/lib/ppdet/model_zoo/model_zoo.py
+++ b/build/lib/ppdet/model_zoo/model_zoo.py
@ -0,0 +1,84 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import os.path as osp
 import pkg_resources
 try:
    from collections.abc import Sequence
 except:
    from collections import Sequence
 from ppdet.core.workspace import load_config, create
 from ppdet.utils.checkpoint import load_weight
 from ppdet.utils.download import get_config_path
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
    'list_model', 'get_config_file', 'get_weights_url', 'get_model',
    'MODEL_ZOO_FILENAME'
 ]
 MODEL_ZOO_FILENAME = 'MODEL_ZOO'
 def list_model(filters=[]):
    model_zoo_file = pkg_resources.resource_filename('ppdet.model_zoo',
                                                     MODEL_ZOO_FILENAME)
    with open(model_zoo_file) as f:
        model_names = f.read().splitlines()
    # filter model_name
    def filt(name):
        for f in filters:
            if name.find(f) < 0:
                return False
        return True
    if isinstance(filters, str) or not isinstance(filters, Sequence):
        filters = [filters]
    model_names = [name for name in model_names if filt(name)]
    if len(model_names) == 0 and len(filters) > 0:
        raise ValueError("no model found, please check filters seeting, "
                         "filters can be set as following kinds:\n"
                         "\tDataset: coco, voc ...\n"
                         "\tArchitecture: yolo, rcnn, ssd ...\n"
                         "\tBackbone: resnet, vgg, darknet ...\n")
    model_str = "Available Models:\n"
    for model_name in model_names:
        model_str += "\t{}\n".format(model_name)
    logger.info(model_str)
 # models and configs save on bcebos under dygraph directory
 def get_config_file(model_name):
    return get_config_path("ppdet://configs/{}.yml".format(model_name))
 def get_weights_url(model_name):
    return "ppdet://models/{}.pdparams".format(osp.split(model_name)[-1])
 def get_model(model_name, pretrained=True):
    cfg_file = get_config_file(model_name)
    cfg = load_config(cfg_file)
    model = create(cfg.architecture)
    if pretrained:
        load_weight(model, get_weights_url(model_name))
    return model
--- a/build/lib/ppdet/model_zoo/tests/init.py
+++ b/build/lib/ppdet/model_zoo/tests/init.py
@ -0,0 +1,13 @@
 #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/build/lib/ppdet/model_zoo/tests/test_get_model.py
+++ b/build/lib/ppdet/model_zoo/tests/test_get_model.py
@ -0,0 +1,48 @@
 #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import paddle
 import ppdet
 import unittest
 # NOTE: weights downloading costs time, we choose
 #       a small model for unittesting
 MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
 class TestGetConfigFile(unittest.TestCase):
    def test_main(self):
        try:
            cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
            assert os.path.isfile(cfg_file)
        except:
            self.assertTrue(False)
 class TestGetModel(unittest.TestCase):
    def test_main(self):
        try:
            model = ppdet.model_zoo.get_model(MODEL_NAME)
            assert isinstance(model, paddle.nn.Layer)
        except:
            self.assertTrue(False)
 if __name__ == '__main__':
    unittest.main()
--- a/build/lib/ppdet/model_zoo/tests/test_list_model.py
+++ b/build/lib/ppdet/model_zoo/tests/test_list_model.py
@ -0,0 +1,68 @@
 #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import unittest
 import ppdet
 class TestListModel(unittest.TestCase):
    def setUp(self):
        self._filter = []
    def test_main(self):
        try:
            ppdet.model_zoo.list_model(self._filter)
            self.assertTrue(True)
        except:
            self.assertTrue(False)
 class TestListModelYOLO(TestListModel):
    def setUp(self):
        self._filter = ['yolo']
 class TestListModelRCNN(TestListModel):
    def setUp(self):
        self._filter = ['rcnn']
 class TestListModelSSD(TestListModel):
    def setUp(self):
        self._filter = ['ssd']
 class TestListModelMultiFilter(TestListModel):
    def setUp(self):
        self._filter = ['yolo', 'darknet']
 class TestListModelError(unittest.TestCase):
    def setUp(self):
        self._filter = ['xxx']
    def test_main(self):
        try:
            ppdet.model_zoo.list_model(self._filter)
            self.assertTrue(False)
        except ValueError:
            self.assertTrue(True)
 if __name__ == '__main__':
    unittest.main()
--- a/build/lib/ppdet/modeling/init.py
+++ b/build/lib/ppdet/modeling/init.py
@ -0,0 +1,41 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import warnings
 warnings.filterwarnings(
    action='ignore', category=DeprecationWarning, module='ops')
 from . import ops
 from . import backbones
 from . import necks
 from . import proposal_generator
 from . import heads
 from . import losses
 from . import architectures
 from . import post_process
 from . import layers
 from . import reid
 from . import mot
 from .ops import *
 from .backbones import *
 from .necks import *
 from .proposal_generator import *
 from .heads import *
 from .losses import *
 from .architectures import *
 from .post_process import *
 from .layers import *
 from .reid import *
 from .mot import *
--- a/build/lib/ppdet/modeling/architectures/init.py
+++ b/build/lib/ppdet/modeling/architectures/init.py
@ -0,0 +1,41 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 from . import meta_arch
 from . import faster_rcnn
 from . import mask_rcnn
 from . import yolo
 from . import cascade_rcnn
 from . import ssd
 from . import fcos
 from . import solov2
 from . import ttfnet
 from . import s2anet
 from . import keypoint_hrhrnet
 from . import keypoint_hrnet
 from . import jde
 from . import deepsort
 from . import fairmot
 from . import centernet
 from .meta_arch import *
 from .faster_rcnn import *
 from .mask_rcnn import *
 from .yolo import *
 from .cascade_rcnn import *
 from .ssd import *
 from .fcos import *
 from .solov2 import *
 from .ttfnet import *
 from .s2anet import *
 from .keypoint_hrhrnet import *
 from .keypoint_hrnet import *
 from .jde import *
 from .deepsort import *
 from .fairmot import *
 from .centernet import *
 from .blazeface import *
--- a/build/lib/ppdet/modeling/architectures/blazeface.py
+++ b/build/lib/ppdet/modeling/architectures/blazeface.py
@ -0,0 +1,91 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['BlazeFace']
@register
 class BlazeFace(BaseArch):
    """
    BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
               see https://arxiv.org/abs/1907.05047
    Args:
        backbone (nn.Layer): backbone instance
        neck (nn.Layer): neck instance
        blaze_head (nn.Layer): `blazeHead` instance
        post_process (object): `BBoxPostProcess` instance
    """
    __category__ = 'architecture'
    __inject__ = ['post_process']
    def __init__(self, backbone, blaze_head, neck, post_process):
        super(BlazeFace, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.blaze_head = blaze_head
        self.post_process = post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        # backbone
        backbone = create(cfg['backbone'])
        # fpn
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        # head
        kwargs = {'input_shape': neck.out_shape}
        blaze_head = create(cfg['blaze_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            'blaze_head': blaze_head,
        }
    def _forward(self):
        # Backbone
        body_feats = self.backbone(self.inputs)
        # neck
        neck_feats = self.neck(body_feats)
        # blaze Head
        if self.training:
            return self.blaze_head(neck_feats, self.inputs['image'],
                                   self.inputs['gt_bbox'],
                                   self.inputs['gt_class'])
        else:
            preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
            bbox, bbox_num = self.post_process(preds, anchors,
                                               self.inputs['im_shape'],
                                               self.inputs['scale_factor'])
            return bbox, bbox_num
    def get_loss(self, ):
        return {"loss": self._forward()}
    def get_pred(self):
        bbox_pred, bbox_num = self._forward()
        output = {
            "bbox": bbox_pred,
            "bbox_num": bbox_num,
        }
        return output
--- a/build/lib/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/cascade_rcnn.py
@ -0,0 +1,143 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['CascadeRCNN']
@register
 class CascadeRCNN(BaseArch):
    """
    Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
    Args:
        backbone (object): backbone instance
        rpn_head (object): `RPNHead` instance
        bbox_head (object): `BBoxHead` instance
        bbox_post_process (object): `BBoxPostProcess` instance
        neck (object): 'FPN' instance
        mask_head (object): `MaskHead` instance
        mask_post_process (object): `MaskPostProcess` instance
    """
    __category__ = 'architecture'
    __inject__ = [
        'bbox_post_process',
        'mask_post_process',
    ]
    def __init__(self,
                 backbone,
                 rpn_head,
                 bbox_head,
                 bbox_post_process,
                 neck=None,
                 mask_head=None,
                 mask_post_process=None):
        super(CascadeRCNN, self).__init__()
        self.backbone = backbone
        self.rpn_head = rpn_head
        self.bbox_head = bbox_head
        self.bbox_post_process = bbox_post_process
        self.neck = neck
        self.mask_head = mask_head
        self.mask_post_process = mask_post_process
        self.with_mask = mask_head is not None
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
        out_shape = neck and neck.out_shape or backbone.out_shape
        kwargs = {'input_shape': out_shape}
        rpn_head = create(cfg['rpn_head'], **kwargs)
        bbox_head = create(cfg['bbox_head'], **kwargs)
        out_shape = neck and out_shape or bbox_head.get_head().out_shape
        kwargs = {'input_shape': out_shape}
        mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "rpn_head": rpn_head,
            "bbox_head": bbox_head,
            "mask_head": mask_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        if self.neck is not None:
            body_feats = self.neck(body_feats)
        if self.training:
            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
                                                  self.inputs)
            rois, rois_num = self.bbox_head.get_assigned_rois()
            bbox_targets = self.bbox_head.get_assigned_targets()
            if self.with_mask:
                mask_loss = self.mask_head(body_feats, rois, rois_num,
                                           self.inputs, bbox_targets, bbox_feat)
                return rpn_loss, bbox_loss, mask_loss
            else:
                return rpn_loss, bbox_loss, {}
        else:
            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
            preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
            refined_rois = self.bbox_head.get_refined_rois()
            im_shape = self.inputs['im_shape']
            scale_factor = self.inputs['scale_factor']
            bbox, bbox_num = self.bbox_post_process(
                preds, (refined_rois, rois_num), im_shape, scale_factor)
            # rescale the prediction back to origin image
            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
                                                        im_shape, scale_factor)
            if not self.with_mask:
                return bbox_pred, bbox_num, None
            mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
            origin_shape = self.bbox_post_process.get_origin_shape()
            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
                                               bbox_num, origin_shape)
            return bbox_pred, bbox_num, mask_pred
    def get_loss(self, ):
        rpn_loss, bbox_loss, mask_loss = self._forward()
        loss = {}
        loss.update(rpn_loss)
        loss.update(bbox_loss)
        if self.with_mask:
            loss.update(mask_loss)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        bbox_pred, bbox_num, mask_pred = self._forward()
        output = {
            'bbox': bbox_pred,
            'bbox_num': bbox_num,
        }
        if self.with_mask:
            output.update({'mask': mask_pred})
        return output
--- a/build/lib/ppdet/modeling/architectures/centernet.py
+++ b/build/lib/ppdet/modeling/architectures/centernet.py
@ -0,0 +1,102 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['CenterNet']
@register
 class CenterNet(BaseArch):
    """
    CenterNet network, see http://arxiv.org/abs/1904.07850
    Args:
        backbone (object): backbone instance
        neck (object): 'CenterDLAFPN' instance
        head (object): 'CenterHead' instance
        post_process (object): 'CenterNetPostProcess' instance
        for_mot (bool): whether return other features used in tracking model
    """
    __category__ = 'architecture'
    __inject__ = ['post_process']
    def __init__(self,
                 backbone='DLA',
                 neck='CenterDLAFPN',
                 head='CenterHead',
                 post_process='CenterNetPostProcess',
                 for_mot=False):
        super(CenterNet, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.head = head
        self.post_process = post_process
        self.for_mot = for_mot
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        kwargs = {'input_shape': neck.out_shape}
        head = create(cfg['head'], **kwargs)
        return {'backbone': backbone, 'neck': neck, "head": head}
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        neck_feat = self.neck(body_feats)
        head_out = self.head(neck_feat, self.inputs)
        if self.for_mot:
            head_out.update({'neck_feat': neck_feat})
        return head_out
    def get_pred(self):
        head_out = self._forward()
        if self.for_mot:
            bbox, bbox_inds = self.post_process(
                head_out['heatmap'],
                head_out['size'],
                head_out['offset'],
                im_shape=self.inputs['im_shape'],
                scale_factor=self.inputs['scale_factor'])
            output = {
                "bbox": bbox,
                "bbox_inds": bbox_inds,
                "neck_feat": head_out['neck_feat']
            }
        else:
            bbox, bbox_num = self.post_process(
                head_out['heatmap'],
                head_out['size'],
                head_out['offset'],
                im_shape=self.inputs['im_shape'],
                scale_factor=self.inputs['scale_factor'])
            output = {
                "bbox": bbox,
                "bbox_num": bbox_num,
            }
        return output
    def get_loss(self):
        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/deepsort.py
+++ b/build/lib/ppdet/modeling/architectures/deepsort.py
@ -0,0 +1,111 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 # 
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
 __all__ = ['DeepSORT']
@register
 class DeepSORT(BaseArch):
    """
    DeepSORT network, see https://arxiv.org/abs/1703.07402
    Args:
        detector (object): detector model instance
        reid (object): reid model instance
        tracker (object): tracker instance
    """
    __category__ = 'architecture'
    def __init__(self,
                 detector='YOLOv3',
                 reid='PCBPyramid',
                 tracker='DeepSORTTracker'):
        super(DeepSORT, self).__init__()
        self.detector = detector
        self.reid = reid
        self.tracker = tracker
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        if cfg['detector'] != 'None':
            detector = create(cfg['detector'])
        else:
            detector = None
        reid = create(cfg['reid'])
        tracker = create(cfg['tracker'])
        return {
            "detector": detector,
            "reid": reid,
            "tracker": tracker,
        }
    def _forward(self):
        assert 'ori_image' in self.inputs
        load_dets = 'pred_bboxes' in self.inputs and 'pred_scores' in self.inputs
        ori_image = self.inputs['ori_image']
        input_shape = self.inputs['image'].shape[2:]
        im_shape = self.inputs['im_shape']
        scale_factor = self.inputs['scale_factor']
        if self.detector and not load_dets:
            outs = self.detector(self.inputs)
            if outs['bbox_num'] > 0:
                pred_bboxes = scale_coords(outs['bbox'][:, 2:], input_shape,
                                           im_shape, scale_factor)
                pred_scores = outs['bbox'][:, 1:2]
            else:
                pred_bboxes = []
                pred_scores = []
        else:
            pred_bboxes = self.inputs['pred_bboxes']
            pred_scores = self.inputs['pred_scores']
        if len(pred_bboxes) > 0:
            pred_bboxes = clip_box(pred_bboxes, input_shape, im_shape,
                                   scale_factor)
            bbox_tlwh = paddle.concat(
                (pred_bboxes[:, 0:2],
                 pred_bboxes[:, 2:4] - pred_bboxes[:, 0:2] + 1),
                axis=1)
            crops, pred_scores = get_crops(
                pred_bboxes, ori_image, pred_scores, w=64, h=192)
            if len(crops) > 0:
                features = self.reid(paddle.to_tensor(crops))
                detections = [Detection(bbox_tlwh[i], conf, features[i])\
                                        for i, conf in enumerate(pred_scores)]
            else:
                detections = []
        else:
            detections = []
        self.tracker.predict()
        online_targets = self.tracker.update(detections)
        return online_targets
    def get_pred(self):
        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/fairmot.py
+++ b/build/lib/ppdet/modeling/architectures/fairmot.py
@ -0,0 +1,107 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['FairMOT']
@register
 class FairMOT(BaseArch):
    """
    FairMOT network, see http://arxiv.org/abs/2004.01888
    Args:
        detector (object): 'CenterNet' instance
        reid (object): 'FairMOTEmbeddingHead' instance
        tracker (object): 'JDETracker' instance
        loss (object): 'FairMOTLoss' instance
    """
    __category__ = 'architecture'
    __inject__ = ['loss']
    def __init__(self,
                 detector='CenterNet',
                 reid='FairMOTEmbeddingHead',
                 tracker='JDETracker',
                 loss='FairMOTLoss'):
        super(FairMOT, self).__init__()
        self.detector = detector
        self.reid = reid
        self.tracker = tracker
        self.loss = loss
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        detector = create(cfg['detector'])
        kwargs = {'input_shape': detector.neck.out_shape}
        reid = create(cfg['reid'], **kwargs)
        loss = create(cfg['loss'])
        tracker = create(cfg['tracker'])
        return {
            'detector': detector,
            'reid': reid,
            'loss': loss,
            'tracker': tracker
        }
    def _forward(self):
        loss = dict()
        # det_outs keys:
        # train: det_loss, heatmap_loss, size_loss, offset_loss, neck_feat
        # eval/infer: bbox, bbox_inds, neck_feat
        det_outs = self.detector(self.inputs)
        neck_feat = det_outs['neck_feat']
        if self.training:
            reid_loss = self.reid(neck_feat, self.inputs)
            det_loss = det_outs['det_loss']
            loss = self.loss(det_loss, reid_loss)
            loss.update({
                'heatmap_loss': det_outs['heatmap_loss'],
                'size_loss': det_outs['size_loss'],
                'offset_loss': det_outs['offset_loss'],
                'reid_loss': reid_loss
            })
            return loss
        else:
            embedding = self.reid(neck_feat, self.inputs)
            bbox_inds = det_outs['bbox_inds']
            embedding = paddle.transpose(embedding, [0, 2, 3, 1])
            embedding = paddle.reshape(embedding,
                                       [-1, paddle.shape(embedding)[-1]])
            id_feature = paddle.gather(embedding, bbox_inds)
            dets = det_outs['bbox']
            id_feature = id_feature
            # Note: the tracker only considers batch_size=1 and num_classses=1
            online_targets = self.tracker.update(dets, id_feature)
            return online_targets
    def get_pred(self):
        output = self._forward()
        return output
    def get_loss(self):
        loss = self._forward()
        return loss
--- a/build/lib/ppdet/modeling/architectures/faster_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/faster_rcnn.py
@ -0,0 +1,106 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['FasterRCNN']
@register
 class FasterRCNN(BaseArch):
    """
    Faster R-CNN network, see https://arxiv.org/abs/1506.01497
    Args:
        backbone (object): backbone instance
        rpn_head (object): `RPNHead` instance
        bbox_head (object): `BBoxHead` instance
        bbox_post_process (object): `BBoxPostProcess` instance
        neck (object): 'FPN' instance
    """
    __category__ = 'architecture'
    __inject__ = ['bbox_post_process']
    def __init__(self,
                 backbone,
                 rpn_head,
                 bbox_head,
                 bbox_post_process,
                 neck=None):
        super(FasterRCNN, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.rpn_head = rpn_head
        self.bbox_head = bbox_head
        self.bbox_post_process = bbox_post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
        out_shape = neck and neck.out_shape or backbone.out_shape
        kwargs = {'input_shape': out_shape}
        rpn_head = create(cfg['rpn_head'], **kwargs)
        bbox_head = create(cfg['bbox_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "rpn_head": rpn_head,
            "bbox_head": bbox_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        if self.neck is not None:
            body_feats = self.neck(body_feats)
        if self.training:
            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
            bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
                                          self.inputs)
            return rpn_loss, bbox_loss
        else:
            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
            preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
            im_shape = self.inputs['im_shape']
            scale_factor = self.inputs['scale_factor']
            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
                                                    im_shape, scale_factor)
            # rescale the prediction back to origin image
            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
                                                        im_shape, scale_factor)
            return bbox_pred, bbox_num
    def get_loss(self, ):
        rpn_loss, bbox_loss = self._forward()
        loss = {}
        loss.update(rpn_loss)
        loss.update(bbox_loss)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        bbox_pred, bbox_num = self._forward()
        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
        return output
--- a/build/lib/ppdet/modeling/architectures/fcos.py
+++ b/build/lib/ppdet/modeling/architectures/fcos.py
@ -0,0 +1,105 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['FCOS']
@register
 class FCOS(BaseArch):
    """
    FCOS network, see https://arxiv.org/abs/1904.01355
    Args:
        backbone (object): backbone instance
        neck (object): 'FPN' instance
        fcos_head (object): 'FCOSHead' instance
        post_process (object): 'FCOSPostProcess' instance
    """
    __category__ = 'architecture'
    __inject__ = ['fcos_post_process']
    def __init__(self,
                 backbone,
                 neck,
                 fcos_head='FCOSHead',
                 fcos_post_process='FCOSPostProcess'):
        super(FCOS, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.fcos_head = fcos_head
        self.fcos_post_process = fcos_post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        kwargs = {'input_shape': neck.out_shape}
        fcos_head = create(cfg['fcos_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "fcos_head": fcos_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        fpn_feats = self.neck(body_feats)
        fcos_head_outs = self.fcos_head(fpn_feats, self.training)
        if not self.training:
            scale_factor = self.inputs['scale_factor']
            bboxes = self.fcos_post_process(fcos_head_outs, scale_factor)
            return bboxes
        else:
            return fcos_head_outs
    def get_loss(self, ):
        loss = {}
        tag_labels, tag_bboxes, tag_centerness = [], [], []
        for i in range(len(self.fcos_head.fpn_stride)):
            # labels, reg_target, centerness
            k_lbl = 'labels{}'.format(i)
            if k_lbl in self.inputs:
                tag_labels.append(self.inputs[k_lbl])
            k_box = 'reg_target{}'.format(i)
            if k_box in self.inputs:
                tag_bboxes.append(self.inputs[k_box])
            k_ctn = 'centerness{}'.format(i)
            if k_ctn in self.inputs:
                tag_centerness.append(self.inputs[k_ctn])
        fcos_head_outs = self._forward()
        loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
                                            tag_bboxes, tag_centerness)
        loss.update(loss_fcos)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        bbox_pred, bbox_num = self._forward()
        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
        return output
--- a/build/lib/ppdet/modeling/architectures/jde.py
+++ b/build/lib/ppdet/modeling/architectures/jde.py
@ -0,0 +1,124 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 # 
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.modeling.mot.utils import scale_coords
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['JDE']
@register
 class JDE(BaseArch):
    __category__ = 'architecture'
    __shared__ = ['metric']
    """
    JDE network, see https://arxiv.org/abs/1909.12605v1
    Args:
        detector (object): detector model instance
        reid (object): reid model instance
        tracker (object): tracker instance
        metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
            for ReID embedding evaluation, or 'MOT' for multi object tracking
            evaluation。
    """
    def __init__(self,
                 detector='YOLOv3',
                 reid='JDEEmbeddingHead',
                 tracker='JDETracker',
                 metric='MOT'):
        super(JDE, self).__init__()
        self.detector = detector
        self.reid = reid
        self.tracker = tracker
        self.metric = metric
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        detector = create(cfg['detector'])
        kwargs = {'input_shape': detector.neck.out_shape}
        reid = create(cfg['reid'], **kwargs)
        tracker = create(cfg['tracker'])
        return {
            "detector": detector,
            "reid": reid,
            "tracker": tracker,
        }
    def _forward(self):
        det_outs = self.detector(self.inputs)
        if self.training:
            emb_feats = det_outs['emb_feats']
            loss_confs = det_outs['det_losses']['loss_confs']
            loss_boxes = det_outs['det_losses']['loss_boxes']
            jde_losses = self.reid(emb_feats, self.inputs, loss_confs,
                                   loss_boxes)
            return jde_losses
        else:
            if self.metric == 'MOTDet':
                det_results = {
                    'bbox': det_outs['bbox'],
                    'bbox_num': det_outs['bbox_num'],
                }
                return det_results
            elif self.metric == 'ReID':
                emb_feats = det_outs['emb_feats']
                embs_and_gts = self.reid(emb_feats, self.inputs, test_emb=True)
                return embs_and_gts
            elif self.metric == 'MOT':
                emb_feats = det_outs['emb_feats']
                emb_outs = self.reid(emb_feats, self.inputs)
                boxes_idx = det_outs['boxes_idx']
                bbox = det_outs['bbox']
                input_shape = self.inputs['image'].shape[2:]
                im_shape = self.inputs['im_shape']
                scale_factor = self.inputs['scale_factor']
                bbox[:, 2:] = scale_coords(bbox[:, 2:], input_shape, im_shape,
                                           scale_factor)
                nms_keep_idx = det_outs['nms_keep_idx']
                pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1)
                emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
                pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
                online_targets = self.tracker.update(pred_dets, pred_embs)
                return online_targets
            else:
                raise ValueError("Unknown metric {} for multi object tracking.".
                                 format(self.metric))
    def get_loss(self):
        return self._forward()
    def get_pred(self):
        return self._forward()
--- a/build/lib/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/build/lib/ppdet/modeling/architectures/keypoint_hrhrnet.py
@ -0,0 +1,286 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from scipy.optimize import linear_sum_assignment
 from collections import abc, defaultdict
 import numpy as np
 import paddle
 from ppdet.core.workspace import register, create, serializable
 from .meta_arch import BaseArch
 from .. import layers as L
 from ..keypoint_utils import transpred
 __all__ = ['HigherHRNet']
@register
 class HigherHRNet(BaseArch):
    __category__ = 'architecture'
    def __init__(self,
                 backbone='HRNet',
                 hrhrnet_head='HigherHRNetHead',
                 post_process='HrHRNetPostProcess',
                 eval_flip=True,
                 flip_perm=None,
                 max_num_people=30):
        """
        HigherHRNet network, see https://arxiv.org/abs/1908.10357；
        HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
        Args:
            backbone (nn.Layer): backbone instance
            hrhrnet_head (nn.Layer): keypoint_head instance
            bbox_post_process (object): `BBoxPostProcess` instance
        """
        super(HigherHRNet, self).__init__()
        self.backbone = backbone
        self.hrhrnet_head = hrhrnet_head
        self.post_process = post_process
        self.flip = eval_flip
        self.flip_perm = paddle.to_tensor(flip_perm)
        self.deploy = False
        self.interpolate = L.Upsample(2, mode='bilinear')
        self.pool = L.MaxPool(5, 1, 2)
        self.max_num_people = max_num_people
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        # backbone
        backbone = create(cfg['backbone'])
        # head
        kwargs = {'input_shape': backbone.out_shape}
        hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
        post_process = create(cfg['post_process'])
        return {
            'backbone': backbone,
            "hrhrnet_head": hrhrnet_head,
            "post_process": post_process,
        }
    def _forward(self):
        if self.flip and not self.training and not self.deploy:
            self.inputs['image'] = paddle.concat(
                (self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
        body_feats = self.backbone(self.inputs)
        if self.training:
            return self.hrhrnet_head(body_feats, self.inputs)
        else:
            outputs = self.hrhrnet_head(body_feats)
            if self.flip and not self.deploy:
                outputs = [paddle.split(o, 2) for o in outputs]
                output_rflip = [
                    paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
                    for o in outputs
                ]
                output1 = [o[0] for o in outputs]
                heatmap = (output1[0] + output_rflip[0]) / 2.
                tagmaps = [output1[1], output_rflip[1]]
                outputs = [heatmap] + tagmaps
            outputs = self.get_topk(outputs)
            if self.deploy:
                return outputs
            res_lst = []
            h = self.inputs['im_shape'][0, 0].numpy().item()
            w = self.inputs['im_shape'][0, 1].numpy().item()
            kpts, scores = self.post_process(*outputs, h, w)
            res_lst.append([kpts, scores])
            return res_lst
    def get_loss(self):
        return self._forward()
    def get_pred(self):
        outputs = {}
        res_lst = self._forward()
        outputs['keypoint'] = res_lst
        return outputs
    def get_topk(self, outputs):
        # resize to image size
        outputs = [self.interpolate(x) for x in outputs]
        if len(outputs) == 3:
            tagmap = paddle.concat(
                (outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
        else:
            tagmap = outputs[1].unsqueeze(4)
        heatmap = outputs[0]
        N, J = 1, self.hrhrnet_head.num_joints
        heatmap_maxpool = self.pool(heatmap)
        # topk
        maxmap = heatmap * (heatmap == heatmap_maxpool)
        maxmap = maxmap.reshape([N, J, -1])
        heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
        outputs = [heatmap, tagmap, heat_k, inds_k]
        return outputs
@register
@serializable
 class HrHRNetPostProcess(object):
    '''
    HrHRNet postprocess contain:
        1) get topk keypoints in the output heatmap
        2) sample the tagmap's value corresponding to each of the topk coordinate
        3) match different joints to combine to some people with Hungary algorithm
        4) adjust the coordinate by +-0.25 to decrease error std
        5) salvage missing joints by check positivity of heatmap - tagdiff_norm
    Args:
        max_num_people (int): max number of people support in postprocess
        heat_thresh (float): value of topk below this threshhold will be ignored
        tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
        inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
        original_height, original_width (float): the original image size
    '''
    def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
        self.max_num_people = max_num_people
        self.heat_thresh = heat_thresh
        self.tag_thresh = tag_thresh
    def lerp(self, j, y, x, heatmap):
        H, W = heatmap.shape[-2:]
        left = np.clip(x - 1, 0, W - 1)
        right = np.clip(x + 1, 0, W - 1)
        up = np.clip(y - 1, 0, H - 1)
        down = np.clip(y + 1, 0, H - 1)
        offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
                            -0.25)
        offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
                            -0.25)
        return offset_y + 0.5, offset_x + 0.5
    def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
                 original_width):
        N, J, H, W = heatmap.shape
        assert N == 1, "only support batch size 1"
        heatmap = heatmap[0].cpu().detach().numpy()
        tagmap = tagmap[0].cpu().detach().numpy()
        heats = heat_k[0].cpu().detach().numpy()
        inds_np = inds_k[0].cpu().detach().numpy()
        y = inds_np // W
        x = inds_np % W
        tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people),
                      y.flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
        coords = np.stack((y, x), axis=2)
        # threshold
        mask = heats > self.heat_thresh
        # cluster
        cluster = defaultdict(lambda: {
            'coords': np.zeros((J, 2), dtype=np.float32),
            'scores': np.zeros(J, dtype=np.float32),
            'tags': []
        })
        for jid, m in enumerate(mask):
            num_valid = m.sum()
            if num_valid == 0:
                continue
            valid_inds = np.where(m)[0]
            valid_tags = tags[jid, m, :]
            if len(cluster) == 0:  # initialize
                for i in valid_inds:
                    tag = tags[jid, i]
                    key = tag[0]
                    cluster[key]['tags'].append(tag)
                    cluster[key]['scores'][jid] = heats[jid, i]
                    cluster[key]['coords'][jid] = coords[jid, i]
                continue
            candidates = list(cluster.keys())[:self.max_num_people]
            centroids = [
                np.mean(
                    cluster[k]['tags'], axis=0) for k in candidates
            ]
            num_clusters = len(centroids)
            # shape is (num_valid, num_clusters, tag_dim)
            dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
            l2_dist = np.linalg.norm(dist, ord=2, axis=2)
            # modulate dist with heat value, see `use_detection_val`
            cost = np.round(l2_dist) * 100 - heats[jid, m, None]
            # pad the cost matrix, otherwise new pose are ignored
            if num_valid > num_clusters:
                cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
                              constant_values=((0, 0), (0, 1e-10)))
            rows, cols = linear_sum_assignment(cost)
            for y, x in zip(rows, cols):
                tag = tags[jid, y]
                if y < num_valid and x < num_clusters and \
                   l2_dist[y, x] < self.tag_thresh:
                    key = candidates[x]  # merge to cluster
                else:
                    key = tag[0]  # initialize new cluster
                cluster[key]['tags'].append(tag)
                cluster[key]['scores'][jid] = heats[jid, y]
                cluster[key]['coords'][jid] = coords[jid, y]
        # shape is [k, J, 2] and [k, J]
        pose_tags = np.array([cluster[k]['tags'] for k in cluster])
        pose_coords = np.array([cluster[k]['coords'] for k in cluster])
        pose_scores = np.array([cluster[k]['scores'] for k in cluster])
        valid = pose_scores > 0
        pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
        if valid.sum() == 0:
            return pose_kpts, pose_kpts
        # refine coords
        valid_coords = pose_coords[valid].astype(np.int32)
        y = valid_coords[..., 0].flatten()
        x = valid_coords[..., 1].flatten()
        _, j = np.nonzero(valid)
        offsets = self.lerp(j, y, x, heatmap)
        pose_coords[valid, 0] += offsets[0]
        pose_coords[valid, 1] += offsets[1]
        # mean score before salvage
        mean_score = pose_scores.mean(axis=1)
        pose_kpts[valid, 2] = pose_scores[valid]
        # salvage missing joints
        if True:
            for pid, coords in enumerate(pose_coords):
                tag_mean = np.array(pose_tags[pid]).mean(axis=0)
                norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
                score = heatmap - np.round(norm)  # (J, H, W)
                flat_score = score.reshape(J, -1)
                max_inds = np.argmax(flat_score, axis=1)
                max_scores = np.max(flat_score, axis=1)
                salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
                if salvage_joints.sum() == 0:
                    continue
                y = max_inds[salvage_joints] // W
                x = max_inds[salvage_joints] % W
                offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
                y = y.astype(np.float32) + offsets[0]
                x = x.astype(np.float32) + offsets[1]
                pose_coords[pid][salvage_joints, 0] = y
                pose_coords[pid][salvage_joints, 1] = x
                pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
        pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
                                       original_height, original_width,
                                       min(H, W))
        return pose_kpts, mean_score
--- a/build/lib/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/build/lib/ppdet/modeling/architectures/keypoint_hrnet.py
@ -0,0 +1,203 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License. 
 # You may obtain a copy of the License at 
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and 
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 import numpy as np
 import math
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 from ..keypoint_utils import transform_preds
 from .. import layers as L
 __all__ = ['TopDownHRNet']
@register
 class TopDownHRNet(BaseArch):
    __category__ = 'architecture'
    __inject__ = ['loss']
    def __init__(self,
                 width,
                 num_joints,
                 backbone='HRNet',
                 loss='KeyPointMSELoss',
                 post_process='HRNetPostProcess',
                 flip_perm=None,
                 flip=True,
                 shift_heatmap=True):
        """
        HRNnet network, see https://arxiv.org/abs/1902.09212
        Args:
            backbone (nn.Layer): backbone instance
            post_process (object): `HRNetPostProcess` instance
            flip_perm (list): The left-right joints exchange order list
        """
        super(TopDownHRNet, self).__init__()
        self.backbone = backbone
        self.post_process = HRNetPostProcess()
        self.loss = loss
        self.flip_perm = flip_perm
        self.flip = flip
        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
        self.shift_heatmap = shift_heatmap
        self.deploy = False
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        # backbone
        backbone = create(cfg['backbone'])
        return {'backbone': backbone, }
    def _forward(self):
        feats = self.backbone(self.inputs)
        hrnet_outputs = self.final_conv(feats[0])
        if self.training:
            return self.loss(hrnet_outputs, self.inputs)
        elif self.deploy:
            return hrnet_outputs
        else:
            if self.flip:
                self.inputs['image'] = self.inputs['image'].flip([3])
                feats = self.backbone(self.inputs)
                output_flipped = self.final_conv(feats[0])
                output_flipped = self.flip_back(output_flipped.numpy(),
                                                self.flip_perm)
                output_flipped = paddle.to_tensor(output_flipped.copy())
                if self.shift_heatmap:
                    output_flipped[:, :, :, 1:] = output_flipped.clone(
                    )[:, :, :, 0:-1]
                hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
            imshape = (self.inputs['im_shape'].numpy()
                       )[:, ::-1] if 'im_shape' in self.inputs else None
            center = self.inputs['center'].numpy(
            ) if 'center' in self.inputs else np.round(imshape / 2.)
            scale = self.inputs['scale'].numpy(
            ) if 'scale' in self.inputs else imshape / 200.
            outputs = self.post_process(hrnet_outputs, center, scale)
            return outputs
    def get_loss(self):
        return self._forward()
    def get_pred(self):
        res_lst = self._forward()
        outputs = {'keypoint': res_lst}
        return outputs
    def flip_back(self, output_flipped, matched_parts):
        assert output_flipped.ndim == 4,\
                'output_flipped should be [batch_size, num_joints, height, width]'
        output_flipped = output_flipped[:, :, :, ::-1]
        for pair in matched_parts:
            tmp = output_flipped[:, pair[0], :, :].copy()
            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
            output_flipped[:, pair[1], :, :] = tmp
        return output_flipped
 class HRNetPostProcess(object):
    def get_max_preds(self, heatmaps):
        '''get predictions from score maps
        Args:
            heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
        Returns:
            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
        '''
        assert isinstance(heatmaps,
                          np.ndarray), 'heatmaps should be numpy.ndarray'
        assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
        batch_size = heatmaps.shape[0]
        num_joints = heatmaps.shape[1]
        width = heatmaps.shape[3]
        heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
        idx = np.argmax(heatmaps_reshaped, 2)
        maxvals = np.amax(heatmaps_reshaped, 2)
        maxvals = maxvals.reshape((batch_size, num_joints, 1))
        idx = idx.reshape((batch_size, num_joints, 1))
        preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
        preds[:, :, 0] = (preds[:, :, 0]) % width
        preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
        pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
        pred_mask = pred_mask.astype(np.float32)
        preds *= pred_mask
        return preds, maxvals
    def get_final_preds(self, heatmaps, center, scale):
        """the highest heatvalue location with a quarter offset in the
        direction from the highest response to the second highest response.
        Args:
            heatmaps (numpy.ndarray): The predicted heatmaps
            center (numpy.ndarray): The boxes center
            scale (numpy.ndarray): The scale factor
        Returns:
            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
            maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
        """
        coords, maxvals = self.get_max_preds(heatmaps)
        heatmap_height = heatmaps.shape[2]
        heatmap_width = heatmaps.shape[3]
        for n in range(coords.shape[0]):
            for p in range(coords.shape[1]):
                hm = heatmaps[n][p]
                px = int(math.floor(coords[n][p][0] + 0.5))
                py = int(math.floor(coords[n][p][1] + 0.5))
                if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
                    diff = np.array([
                        hm[py][px + 1] - hm[py][px - 1],
                        hm[py + 1][px] - hm[py - 1][px]
                    ])
                    coords[n][p] += np.sign(diff) * .25
        preds = coords.copy()
        # Transform back
        for i in range(coords.shape[0]):
            preds[i] = transform_preds(coords[i], center[i], scale[i],
                                       [heatmap_width, heatmap_height])
        return preds, maxvals
    def __call__(self, output, center, scale):
        preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
        outputs = [[
            np.concatenate(
                (preds, maxvals), axis=-1), np.mean(
                    maxvals, axis=1)
        ]]
        return outputs
--- a/build/lib/ppdet/modeling/architectures/mask_rcnn.py
+++ b/build/lib/ppdet/modeling/architectures/mask_rcnn.py
@ -0,0 +1,135 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['MaskRCNN']
@register
 class MaskRCNN(BaseArch):
    """
    Mask R-CNN network, see https://arxiv.org/abs/1703.06870
    Args:
        backbone (object): backbone instance
        rpn_head (object): `RPNHead` instance
        bbox_head (object): `BBoxHead` instance
        mask_head (object): `MaskHead` instance
        bbox_post_process (object): `BBoxPostProcess` instance
        mask_post_process (object): `MaskPostProcess` instance
        neck (object): 'FPN' instance
    """
    __category__ = 'architecture'
    __inject__ = [
        'bbox_post_process',
        'mask_post_process',
    ]
    def __init__(self,
                 backbone,
                 rpn_head,
                 bbox_head,
                 mask_head,
                 bbox_post_process,
                 mask_post_process,
                 neck=None):
        super(MaskRCNN, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.rpn_head = rpn_head
        self.bbox_head = bbox_head
        self.mask_head = mask_head
        self.bbox_post_process = bbox_post_process
        self.mask_post_process = mask_post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
        out_shape = neck and neck.out_shape or backbone.out_shape
        kwargs = {'input_shape': out_shape}
        rpn_head = create(cfg['rpn_head'], **kwargs)
        bbox_head = create(cfg['bbox_head'], **kwargs)
        out_shape = neck and out_shape or bbox_head.get_head().out_shape
        kwargs = {'input_shape': out_shape}
        mask_head = create(cfg['mask_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "rpn_head": rpn_head,
            "bbox_head": bbox_head,
            "mask_head": mask_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        if self.neck is not None:
            body_feats = self.neck(body_feats)
        if self.training:
            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
                                                  self.inputs)
            rois, rois_num = self.bbox_head.get_assigned_rois()
            bbox_targets = self.bbox_head.get_assigned_targets()
            # Mask Head needs bbox_feat in Mask RCNN
            mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
                                       bbox_targets, bbox_feat)
            return rpn_loss, bbox_loss, mask_loss
        else:
            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
            preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
            im_shape = self.inputs['im_shape']
            scale_factor = self.inputs['scale_factor']
            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
                                                    im_shape, scale_factor)
            mask_out = self.mask_head(
                body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
            # rescale the prediction back to origin image
            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
                                                        im_shape, scale_factor)
            origin_shape = self.bbox_post_process.get_origin_shape()
            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
                                               bbox_num, origin_shape)
            return bbox_pred, bbox_num, mask_pred
    def get_loss(self, ):
        bbox_loss, mask_loss, rpn_loss = self._forward()
        loss = {}
        loss.update(rpn_loss)
        loss.update(bbox_loss)
        loss.update(mask_loss)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        bbox_pred, bbox_num, mask_pred = self._forward()
        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
        return output
--- a/build/lib/ppdet/modeling/architectures/meta_arch.py
+++ b/build/lib/ppdet/modeling/architectures/meta_arch.py
@ -0,0 +1,44 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 import paddle.nn as nn
 from ppdet.core.workspace import register
 __all__ = ['BaseArch']
@register
 class BaseArch(nn.Layer):
    def __init__(self, data_format='NCHW'):
        super(BaseArch, self).__init__()
        self.data_format = data_format
    def forward(self, inputs):
        if self.data_format == 'NHWC':
            image = inputs['image']
            inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
        self.inputs = inputs
        self.model_arch()
        if self.training:
            out = self.get_loss()
        else:
            out = self.get_pred()
        return out
    def build_inputs(self, data, input_def):
        inputs = {}
        for i, k in enumerate(input_def):
            inputs[k] = data[i]
        return inputs
    def model_arch(self, ):
        pass
    def get_loss(self, ):
        raise NotImplementedError("Should implement get_loss method!")
    def get_pred(self, ):
        raise NotImplementedError("Should implement get_pred method!")
--- a/build/lib/ppdet/modeling/architectures/s2anet.py
+++ b/build/lib/ppdet/modeling/architectures/s2anet.py
@ -0,0 +1,102 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['S2ANet']
@register
 class S2ANet(BaseArch):
    __category__ = 'architecture'
    __inject__ = [
        's2anet_head',
        's2anet_bbox_post_process',
    ]
    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
        """
        S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
        Args:
            backbone (object): backbone instance
            neck (object): `FPN` instance
            s2anet_head (object): `S2ANetHead` instance
            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
        """
        super(S2ANet, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.s2anet_head = s2anet_head
        self.s2anet_bbox_post_process = s2anet_bbox_post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
        out_shape = neck and neck.out_shape or backbone.out_shape
        kwargs = {'input_shape': out_shape}
        s2anet_head = create(cfg['s2anet_head'], **kwargs)
        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
                                          **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "s2anet_head": s2anet_head,
            "s2anet_bbox_post_process": s2anet_bbox_post_process,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        if self.neck is not None:
            body_feats = self.neck(body_feats)
        self.s2anet_head(body_feats)
        if self.training:
            loss = self.s2anet_head.get_loss(self.inputs)
            total_loss = paddle.add_n(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            im_shape = self.inputs['im_shape']
            scale_factor = self.inputs['scale_factor']
            nms_pre = self.s2anet_bbox_post_process.nms_pre
            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
            # post_process
            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
                                                                  pred_bboxes)
            # rescale the prediction back to origin image
            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
                pred_bboxes, bbox_num, im_shape, scale_factor)
            # output
            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
            return output
    def get_loss(self, ):
        loss = self._forward()
        return loss
    def get_pred(self):
        output = self._forward()
        return output
--- a/build/lib/ppdet/modeling/architectures/solov2.py
+++ b/build/lib/ppdet/modeling/architectures/solov2.py
@ -0,0 +1,110 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['SOLOv2']
@register
 class SOLOv2(BaseArch):
    """
    SOLOv2 network, see https://arxiv.org/abs/2003.10152
    Args:
        backbone (object): an backbone instance
        solov2_head (object): an `SOLOv2Head` instance
        mask_head (object): an `SOLOv2MaskHead` instance
        neck (object): neck of network, such as feature pyramid network instance
    """
    __category__ = 'architecture'
    def __init__(self, backbone, solov2_head, mask_head, neck=None):
        super(SOLOv2, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.solov2_head = solov2_head
        self.mask_head = mask_head
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        kwargs = {'input_shape': neck.out_shape}
        solov2_head = create(cfg['solov2_head'], **kwargs)
        mask_head = create(cfg['mask_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            'solov2_head': solov2_head,
            'mask_head': mask_head,
        }
    def model_arch(self):
        body_feats = self.backbone(self.inputs)
        body_feats = self.neck(body_feats)
        self.seg_pred = self.mask_head(body_feats)
        self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
            body_feats)
    def get_loss(self, ):
        loss = {}
        # get gt_ins_labels, gt_cate_labels, etc.
        gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
        fg_num = self.inputs['fg_num']
        for i in range(len(self.solov2_head.seg_num_grids)):
            ins_label = 'ins_label{}'.format(i)
            if ins_label in self.inputs:
                gt_ins_labels.append(self.inputs[ins_label])
            cate_label = 'cate_label{}'.format(i)
            if cate_label in self.inputs:
                gt_cate_labels.append(self.inputs[cate_label])
            grid_order = 'grid_order{}'.format(i)
            if grid_order in self.inputs:
                gt_grid_orders.append(self.inputs[grid_order])
        loss_solov2 = self.solov2_head.get_loss(
            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
            gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
        loss.update(loss_solov2)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
            self.inputs['im_shape'], self.inputs['scale_factor'])
        outs = {
            "segm": seg_masks,
            "bbox_num": bbox_num,
            'cate_label': cate_labels,
            'cate_score': cate_scores
        }
        return outs
--- a/build/lib/ppdet/modeling/architectures/ssd.py
+++ b/build/lib/ppdet/modeling/architectures/ssd.py
@ -0,0 +1,84 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['SSD']
@register
 class SSD(BaseArch):
    """
    Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
    Args:
        backbone (nn.Layer): backbone instance
        ssd_head (nn.Layer): `SSDHead` instance
        post_process (object): `BBoxPostProcess` instance
    """
    __category__ = 'architecture'
    __inject__ = ['post_process']
    def __init__(self, backbone, ssd_head, post_process):
        super(SSD, self).__init__()
        self.backbone = backbone
        self.ssd_head = ssd_head
        self.post_process = post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        # backbone
        backbone = create(cfg['backbone'])
        # head
        kwargs = {'input_shape': backbone.out_shape}
        ssd_head = create(cfg['ssd_head'], **kwargs)
        return {
            'backbone': backbone,
            "ssd_head": ssd_head,
        }
    def _forward(self):
        # Backbone
        body_feats = self.backbone(self.inputs)
        # SSD Head
        if self.training:
            return self.ssd_head(body_feats, self.inputs['image'],
                                 self.inputs['gt_bbox'],
                                 self.inputs['gt_class'])
        else:
            preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
            bbox, bbox_num = self.post_process(preds, anchors,
                                               self.inputs['im_shape'],
                                               self.inputs['scale_factor'])
            return bbox, bbox_num
    def get_loss(self, ):
        return {"loss": self._forward()}
    def get_pred(self):
        bbox_pred, bbox_num = self._forward()
        output = {
            "bbox": bbox_pred,
            "bbox_num": bbox_num,
        }
        return output
--- a/build/lib/ppdet/modeling/architectures/ttfnet.py
+++ b/build/lib/ppdet/modeling/architectures/ttfnet.py
@ -0,0 +1,98 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['TTFNet']
@register
 class TTFNet(BaseArch):
    """
    TTFNet network, see https://arxiv.org/abs/1909.00700
    Args:
        backbone (object): backbone instance
        neck (object): 'TTFFPN' instance
        ttf_head (object): 'TTFHead' instance
        post_process (object): 'BBoxPostProcess' instance
    """
    __category__ = 'architecture'
    __inject__ = ['post_process']
    def __init__(self,
                 backbone='DarkNet',
                 neck='TTFFPN',
                 ttf_head='TTFHead',
                 post_process='BBoxPostProcess'):
        super(TTFNet, self).__init__()
        self.backbone = backbone
        self.neck = neck
        self.ttf_head = ttf_head
        self.post_process = post_process
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        backbone = create(cfg['backbone'])
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        kwargs = {'input_shape': neck.out_shape}
        ttf_head = create(cfg['ttf_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "ttf_head": ttf_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        body_feats = self.neck(body_feats)
        hm, wh = self.ttf_head(body_feats)
        if self.training:
            return hm, wh
        else:
            bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
                                               self.inputs['scale_factor'])
            return bbox, bbox_num
    def get_loss(self, ):
        loss = {}
        heatmap = self.inputs['ttf_heatmap']
        box_target = self.inputs['ttf_box_target']
        reg_weight = self.inputs['ttf_reg_weight']
        hm, wh = self._forward()
        head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
                                           reg_weight)
        loss.update(head_loss)
        total_loss = paddle.add_n(list(loss.values()))
        loss.update({'loss': total_loss})
        return loss
    def get_pred(self):
        bbox_pred, bbox_num = self._forward()
        output = {
            "bbox": bbox_pred,
            "bbox_num": bbox_num,
        }
        return output
--- a/build/lib/ppdet/modeling/architectures/yolo.py
+++ b/build/lib/ppdet/modeling/architectures/yolo.py
@ -0,0 +1,104 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from ppdet.core.workspace import register, create
 from .meta_arch import BaseArch
 __all__ = ['YOLOv3']
@register
 class YOLOv3(BaseArch):
    __category__ = 'architecture'
    __shared__ = ['data_format']
    __inject__ = ['post_process']
    def __init__(self,
                 backbone='DarkNet',
                 neck='YOLOv3FPN',
                 yolo_head='YOLOv3Head',
                 post_process='BBoxPostProcess',
                 data_format='NCHW',
                 for_mot=False):
        """
        YOLOv3 network, see https://arxiv.org/abs/1804.02767
        Args:
            backbone (nn.Layer): backbone instance
            neck (nn.Layer): neck instance
            yolo_head (nn.Layer): anchor_head instance
            bbox_post_process (object): `BBoxPostProcess` instance
            data_format (str): data format, NCHW or NHWC
            for_mot (bool): whether return other features for multi-object tracking
                models, default False in pure object detection models.
        """
        super(YOLOv3, self).__init__(data_format=data_format)
        self.backbone = backbone
        self.neck = neck
        self.yolo_head = yolo_head
        self.post_process = post_process
        self.for_mot = for_mot
    @classmethod
    def from_config(cls, cfg, *args, **kwargs):
        # backbone
        backbone = create(cfg['backbone'])
        # fpn
        kwargs = {'input_shape': backbone.out_shape}
        neck = create(cfg['neck'], **kwargs)
        # head
        kwargs = {'input_shape': neck.out_shape}
        yolo_head = create(cfg['yolo_head'], **kwargs)
        return {
            'backbone': backbone,
            'neck': neck,
            "yolo_head": yolo_head,
        }
    def _forward(self):
        body_feats = self.backbone(self.inputs)
        neck_feats = self.neck(body_feats, self.for_mot)
        if isinstance(neck_feats, dict):
            assert self.for_mot == True
            emb_feats = neck_feats['emb_feats']
            neck_feats = neck_feats['yolo_feats']
        if self.training:
            yolo_losses = self.yolo_head(neck_feats, self.inputs)
            if self.for_mot:
                return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
            else:
                return yolo_losses
        else:
            yolo_head_outs = self.yolo_head(neck_feats)
            if self.for_mot:
                boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
                    yolo_head_outs, self.yolo_head.mask_anchors)
                output = {
                    'bbox': bbox,
                    'bbox_num': bbox_num,
                    'boxes_idx': boxes_idx,
                    'nms_keep_idx': nms_keep_idx,
                    'emb_feats': emb_feats,
                }
            else:
                bbox, bbox_num = self.post_process(
                    yolo_head_outs, self.yolo_head.mask_anchors,
                    self.inputs['im_shape'], self.inputs['scale_factor'])
                output = {'bbox': bbox, 'bbox_num': bbox_num}
            return output
    def get_loss(self):
        return self._forward()
    def get_pred(self):
        return self._forward()
--- a/build/lib/ppdet/modeling/backbones/init.py
+++ b/build/lib/ppdet/modeling/backbones/init.py
@ -0,0 +1,37 @@
 # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import vgg
 from . import resnet
 from . import darknet
 from . import mobilenet_v1
 from . import mobilenet_v3
 from . import hrnet
 from . import blazenet
 from . import ghostnet
 from . import senet
 from . import res2net
 from . import dla
 from .vgg import *
 from .resnet import *
 from .darknet import *
 from .mobilenet_v1 import *
 from .mobilenet_v3 import *
 from .hrnet import *
 from .blazenet import *
 from .ghostnet import *
 from .senet import *
 from .res2net import *
 from .dla import *
--- a/build/lib/ppdet/modeling/backbones/blazenet.py
+++ b/build/lib/ppdet/modeling/backbones/blazenet.py
@ -0,0 +1,333 @@
 # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.nn.initializer import KaimingNormal
 from ppdet.core.workspace import register, serializable
 from ..shape_spec import ShapeSpec
 __all__ = ['BlazeNet']
 def hard_swish(x):
    return x * F.relu6(x + 3) / 6.
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 num_groups=1,
                 act='relu',
                 conv_lr=0.1,
                 conv_decay=0.,
                 norm_decay=0.,
                 norm_type='bn',
                 name=None):
        super(ConvBNLayer, self).__init__()
        self.act = act
        self._conv = nn.Conv2D(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=num_groups,
            weight_attr=ParamAttr(
                learning_rate=conv_lr,
                initializer=KaimingNormal(),
                name=name + "_weights"),
            bias_attr=False)
        param_attr = ParamAttr(name=name + "_bn_scale")
        bias_attr = ParamAttr(name=name + "_bn_offset")
        if norm_type == 'sync_bn':
            self._batch_norm = nn.SyncBatchNorm(
                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
        else:
            self._batch_norm = nn.BatchNorm(
                out_channels,
                act=None,
                param_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=False,
                moving_mean_name=name + '_bn_mean',
                moving_variance_name=name + '_bn_variance')
    def forward(self, x):
        x = self._conv(x)
        x = self._batch_norm(x)
        if self.act == "relu":
            x = F.relu(x)
        elif self.act == "relu6":
            x = F.relu6(x)
        elif self.act == 'leaky':
            x = F.leaky_relu(x)
        elif self.act == 'hard_swish':
            x = hard_swish(x)
        return x
 class BlazeBlock(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels1,
                 out_channels2,
                 double_channels=None,
                 stride=1,
                 use_5x5kernel=True,
                 act='relu',
                 name=None):
        super(BlazeBlock, self).__init__()
        assert stride in [1, 2]
        self.use_pool = not stride == 1
        self.use_double_block = double_channels is not None
        self.conv_dw = []
        if use_5x5kernel:
            self.conv_dw.append(
                self.add_sublayer(
                    name + "1_dw",
                    ConvBNLayer(
                        in_channels=in_channels,
                        out_channels=out_channels1,
                        kernel_size=5,
                        stride=stride,
                        padding=2,
                        num_groups=out_channels1,
                        name=name + "1_dw")))
        else:
            self.conv_dw.append(
                self.add_sublayer(
                    name + "1_dw_1",
                    ConvBNLayer(
                        in_channels=in_channels,
                        out_channels=out_channels1,
                        kernel_size=3,
                        stride=1,
                        padding=1,
                        num_groups=out_channels1,
                        name=name + "1_dw_1")))
            self.conv_dw.append(
                self.add_sublayer(
                    name + "1_dw_2",
                    ConvBNLayer(
                        in_channels=out_channels1,
                        out_channels=out_channels1,
                        kernel_size=3,
                        stride=stride,
                        padding=1,
                        num_groups=out_channels1,
                        name=name + "1_dw_2")))
        self.act = act if self.use_double_block else None
        self.conv_pw = ConvBNLayer(
            in_channels=out_channels1,
            out_channels=out_channels2,
            kernel_size=1,
            stride=1,
            padding=0,
            act=self.act,
            name=name + "1_sep")
        if self.use_double_block:
            self.conv_dw2 = []
            if use_5x5kernel:
                self.conv_dw2.append(
                    self.add_sublayer(
                        name + "2_dw",
                        ConvBNLayer(
                            in_channels=out_channels2,
                            out_channels=out_channels2,
                            kernel_size=5,
                            stride=1,
                            padding=2,
                            num_groups=out_channels2,
                            name=name + "2_dw")))
            else:
                self.conv_dw2.append(
                    self.add_sublayer(
                        name + "2_dw_1",
                        ConvBNLayer(
                            in_channels=out_channels2,
                            out_channels=out_channels2,
                            kernel_size=3,
                            stride=1,
                            padding=1,
                            num_groups=out_channels2,
                            name=name + "1_dw_1")))
                self.conv_dw2.append(
                    self.add_sublayer(
                        name + "2_dw_2",
                        ConvBNLayer(
                            in_channels=out_channels2,
                            out_channels=out_channels2,
                            kernel_size=3,
                            stride=1,
                            padding=1,
                            num_groups=out_channels2,
                            name=name + "2_dw_2")))
            self.conv_pw2 = ConvBNLayer(
                in_channels=out_channels2,
                out_channels=double_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                name=name + "2_sep")
        # shortcut
        if self.use_pool:
            shortcut_channel = double_channels or out_channels2
            self._shortcut = []
            self._shortcut.append(
                self.add_sublayer(
                    name + '_shortcut_pool',
                    nn.MaxPool2D(
                        kernel_size=stride, stride=stride, ceil_mode=True)))
            self._shortcut.append(
                self.add_sublayer(
                    name + '_shortcut_conv',
                    ConvBNLayer(
                        in_channels=in_channels,
                        out_channels=shortcut_channel,
                        kernel_size=1,
                        stride=1,
                        padding=0,
                        name="shortcut" + name)))
    def forward(self, x):
        y = x
        for conv_dw_block in self.conv_dw:
            y = conv_dw_block(y)
        y = self.conv_pw(y)
        if self.use_double_block:
            for conv_dw2_block in self.conv_dw2:
                y = conv_dw2_block(y)
            y = self.conv_pw2(y)
        if self.use_pool:
            for shortcut in self._shortcut:
                x = shortcut(x)
        return F.relu(paddle.add(x, y))
@register
@serializable
 class BlazeNet(nn.Layer):
    """
    BlazeFace, see https://arxiv.org/abs/1907.05047
    Args:
        blaze_filters (list): number of filter for each blaze block.
        double_blaze_filters (list): number of filter for each double_blaze block.
        use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv.
    """
    def __init__(
            self,
            blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]],
            double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
                                  [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]],
            use_5x5kernel=True,
            act=None):
        super(BlazeNet, self).__init__()
        conv1_num_filters = blaze_filters[0][0]
        self.conv1 = ConvBNLayer(
            in_channels=3,
            out_channels=conv1_num_filters,
            kernel_size=3,
            stride=2,
            padding=1,
            name="conv1")
        in_channels = conv1_num_filters
        self.blaze_block = []
        self._out_channels = []
        for k, v in enumerate(blaze_filters):
            assert len(v) in [2, 3], \
                "blaze_filters {} not in [2, 3]"
            if len(v) == 2:
                self.blaze_block.append(
                    self.add_sublayer(
                        'blaze_{}'.format(k),
                        BlazeBlock(
                            in_channels,
                            v[0],
                            v[1],
                            use_5x5kernel=use_5x5kernel,
                            act=act,
                            name='blaze_{}'.format(k))))
            elif len(v) == 3:
                self.blaze_block.append(
                    self.add_sublayer(
                        'blaze_{}'.format(k),
                        BlazeBlock(
                            in_channels,
                            v[0],
                            v[1],
                            stride=v[2],
                            use_5x5kernel=use_5x5kernel,
                            act=act,
                            name='blaze_{}'.format(k))))
            in_channels = v[1]
        for k, v in enumerate(double_blaze_filters):
            assert len(v) in [3, 4], \
                "blaze_filters {} not in [3, 4]"
            if len(v) == 3:
                self.blaze_block.append(
                    self.add_sublayer(
                        'double_blaze_{}'.format(k),
                        BlazeBlock(
                            in_channels,
                            v[0],
                            v[1],
                            double_channels=v[2],
                            use_5x5kernel=use_5x5kernel,
                            act=act,
                            name='double_blaze_{}'.format(k))))
            elif len(v) == 4:
                self.blaze_block.append(
                    self.add_sublayer(
                        'double_blaze_{}'.format(k),
                        BlazeBlock(
                            in_channels,
                            v[0],
                            v[1],
                            double_channels=v[2],
                            stride=v[3],
                            use_5x5kernel=use_5x5kernel,
                            act=act,
                            name='double_blaze_{}'.format(k))))
            in_channels = v[2]
            self._out_channels.append(in_channels)
    def forward(self, inputs):
        outs = []
        y = self.conv1(inputs['image'])
        for block in self.blaze_block:
            y = block(y)
            outs.append(y)
        return [outs[-4], outs[-1]]
    @property
    def out_shape(self):
        return [
            ShapeSpec(channels=c)
            for c in [self._out_channels[-4], self._out_channels[-1]]
        ]
--- a/build/lib/ppdet/modeling/backbones/darknet.py
+++ b/build/lib/ppdet/modeling/backbones/darknet.py
@ -0,0 +1,340 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register, serializable
 from ppdet.modeling.ops import batch_norm, mish
 from ..shape_spec import ShapeSpec
 __all__ = ['DarkNet', 'ConvBNLayer']
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size=3,
                 stride=1,
                 groups=1,
                 padding=0,
                 norm_type='bn',
                 norm_decay=0.,
                 act="leaky",
                 freeze_norm=False,
                 data_format='NCHW',
                 name=''):
        """
        conv + bn + activation layer
        Args:
            ch_in (int): input channel
            ch_out (int): output channel
            filter_size (int): filter size, default 3
            stride (int): stride, default 1
            groups (int): number of groups of conv layer, default 1
            padding (int): padding size, default 0
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            act (str): activation function type, default 'leaky', which means leaky_relu
            freeze_norm (bool): whether to freeze norm, default False
            data_format (str): data format, NCHW or NHWC
        """
        super(ConvBNLayer, self).__init__()
        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=padding,
            groups=groups,
            data_format=data_format,
            bias_attr=False)
        self.batch_norm = batch_norm(
            ch_out,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self.act = act
    def forward(self, inputs):
        out = self.conv(inputs)
        out = self.batch_norm(out)
        if self.act == 'leaky':
            out = F.leaky_relu(out, 0.1)
        elif self.act == 'mish':
            out = mish(out)
        return out
 class DownSample(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size=3,
                 stride=2,
                 padding=1,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 data_format='NCHW'):
        """
        downsample layer
        Args:
            ch_in (int): input channel
            ch_out (int): output channel
            filter_size (int): filter size, default 3
            stride (int): stride, default 2
            padding (int): padding size, default 1
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            freeze_norm (bool): whether to freeze norm, default False
            data_format (str): data format, NCHW or NHWC
        """
        super(DownSample, self).__init__()
        self.conv_bn_layer = ConvBNLayer(
            ch_in=ch_in,
            ch_out=ch_out,
            filter_size=filter_size,
            stride=stride,
            padding=padding,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self.ch_out = ch_out
    def forward(self, inputs):
        out = self.conv_bn_layer(inputs)
        return out
 class BasicBlock(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 data_format='NCHW'):
        """
        BasicBlock layer of DarkNet
        Args:
            ch_in (int): input channel
            ch_out (int): output channel
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            freeze_norm (bool): whether to freeze norm, default False
            data_format (str): data format, NCHW or NHWC
        """
        super(BasicBlock, self).__init__()
        self.conv1 = ConvBNLayer(
            ch_in=ch_in,
            ch_out=ch_out,
            filter_size=1,
            stride=1,
            padding=0,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self.conv2 = ConvBNLayer(
            ch_in=ch_out,
            ch_out=ch_out * 2,
            filter_size=3,
            stride=1,
            padding=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
    def forward(self, inputs):
        conv1 = self.conv1(inputs)
        conv2 = self.conv2(conv1)
        out = paddle.add(x=inputs, y=conv2)
        return out
 class Blocks(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 count,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 name=None,
                 data_format='NCHW'):
        """
        Blocks layer, which consist of some BaickBlock layers
        Args:
            ch_in (int): input channel
            ch_out (int): output channel
            count (int): number of BasicBlock layer
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            freeze_norm (bool): whether to freeze norm, default False
            name (str): layer name
            data_format (str): data format, NCHW or NHWC
        """
        super(Blocks, self).__init__()
        self.basicblock0 = BasicBlock(
            ch_in,
            ch_out,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self.res_out_list = []
        for i in range(1, count):
            block_name = '{}.{}'.format(name, i)
            res_out = self.add_sublayer(
                block_name,
                BasicBlock(
                    ch_out * 2,
                    ch_out,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    data_format=data_format))
            self.res_out_list.append(res_out)
        self.ch_out = ch_out
    def forward(self, inputs):
        y = self.basicblock0(inputs)
        for basic_block_i in self.res_out_list:
            y = basic_block_i(y)
        return y
 DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
@register
@serializable
 class DarkNet(nn.Layer):
    __shared__ = ['norm_type', 'data_format']
    def __init__(self,
                 depth=53,
                 freeze_at=-1,
                 return_idx=[2, 3, 4],
                 num_stages=5,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 data_format='NCHW'):
        """
        Darknet, see https://pjreddie.com/darknet/yolo/
        Args:
            depth (int): depth of network
            freeze_at (int): freeze the backbone at which stage
            filter_size (int): filter size, default 3
            return_idx (list): index of stages whose feature maps are returned
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            data_format (str): data format, NCHW or NHWC
        """
        super(DarkNet, self).__init__()
        self.depth = depth
        self.freeze_at = freeze_at
        self.return_idx = return_idx
        self.num_stages = num_stages
        self.stages = DarkNet_cfg[self.depth][0:num_stages]
        self.conv0 = ConvBNLayer(
            ch_in=3,
            ch_out=32,
            filter_size=3,
            stride=1,
            padding=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self.downsample0 = DownSample(
            ch_in=32,
            ch_out=32 * 2,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            data_format=data_format)
        self._out_channels = []
        self.darknet_conv_block_list = []
        self.downsample_list = []
        ch_in = [64, 128, 256, 512, 1024]
        for i, stage in enumerate(self.stages):
            name = 'stage.{}'.format(i)
            conv_block = self.add_sublayer(
                name,
                Blocks(
                    int(ch_in[i]),
                    32 * (2**i),
                    stage,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    data_format=data_format,
                    name=name))
            self.darknet_conv_block_list.append(conv_block)
            if i in return_idx:
                self._out_channels.append(64 * (2**i))
        for i in range(num_stages - 1):
            down_name = 'stage.{}.downsample'.format(i)
            downsample = self.add_sublayer(
                down_name,
                DownSample(
                    ch_in=32 * (2**(i + 1)),
                    ch_out=32 * (2**(i + 2)),
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    data_format=data_format))
            self.downsample_list.append(downsample)
    def forward(self, inputs):
        x = inputs['image']
        out = self.conv0(x)
        out = self.downsample0(out)
        blocks = []
        for i, conv_block_i in enumerate(self.darknet_conv_block_list):
            out = conv_block_i(out)
            if i == self.freeze_at:
                out.stop_gradient = True
            if i in self.return_idx:
                blocks.append(out)
            if i < self.num_stages - 1:
                out = self.downsample_list[i](out)
        return blocks
    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/dla.py
+++ b/build/lib/ppdet/modeling/backbones/dla.py
@ -0,0 +1,243 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register, serializable
 from ppdet.modeling.layers import ConvNormLayer
 from ..shape_spec import ShapeSpec
 DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])}
 class BasicBlock(nn.Layer):
    def __init__(self, ch_in, ch_out, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = ConvNormLayer(
            ch_in,
            ch_out,
            filter_size=3,
            stride=stride,
            bias_on=False,
            norm_decay=None)
        self.conv2 = ConvNormLayer(
            ch_out,
            ch_out,
            filter_size=3,
            stride=1,
            bias_on=False,
            norm_decay=None)
    def forward(self, inputs, residual=None):
        if residual is None:
            residual = inputs
        out = self.conv1(inputs)
        out = F.relu(out)
        out = self.conv2(out)
        out = paddle.add(x=out, y=residual)
        out = F.relu(out)
        return out
 class Root(nn.Layer):
    def __init__(self, ch_in, ch_out, kernel_size, residual):
        super(Root, self).__init__()
        self.conv = ConvNormLayer(
            ch_in,
            ch_out,
            filter_size=1,
            stride=1,
            bias_on=False,
            norm_decay=None)
        self.residual = residual
    def forward(self, inputs):
        children = inputs
        out = self.conv(paddle.concat(inputs, axis=1))
        if self.residual:
            out = paddle.add(x=out, y=children[0])
        out = F.relu(out)
        return out
 class Tree(nn.Layer):
    def __init__(self,
                 level,
                 block,
                 ch_in,
                 ch_out,
                 stride=1,
                 level_root=False,
                 root_dim=0,
                 root_kernel_size=1,
                 root_residual=False):
        super(Tree, self).__init__()
        if root_dim == 0:
            root_dim = 2 * ch_out
        if level_root:
            root_dim += ch_in
        if level == 1:
            self.tree1 = block(ch_in, ch_out, stride)
            self.tree2 = block(ch_out, ch_out, 1)
        else:
            self.tree1 = Tree(
                level - 1,
                block,
                ch_in,
                ch_out,
                stride,
                root_dim=0,
                root_kernel_size=root_kernel_size,
                root_residual=root_residual)
            self.tree2 = Tree(
                level - 1,
                block,
                ch_out,
                ch_out,
                1,
                root_dim=root_dim + ch_out,
                root_kernel_size=root_kernel_size,
                root_residual=root_residual)
        if level == 1:
            self.root = Root(root_dim, ch_out, root_kernel_size, root_residual)
        self.level_root = level_root
        self.root_dim = root_dim
        self.downsample = None
        self.project = None
        self.level = level
        if stride > 1:
            self.downsample = nn.MaxPool2D(stride, stride=stride)
        if ch_in != ch_out:
            self.project = ConvNormLayer(
                ch_in,
                ch_out,
                filter_size=1,
                stride=1,
                bias_on=False,
                norm_decay=None)
    def forward(self, x, residual=None, children=None):
        children = [] if children is None else children
        bottom = self.downsample(x) if self.downsample else x
        residual = self.project(bottom) if self.project else bottom
        if self.level_root:
            children.append(bottom)
        x1 = self.tree1(x, residual)
        if self.level == 1:
            x2 = self.tree2(x1)
            x = self.root([x2, x1] + children)
        else:
            children.append(x1)
            x = self.tree2(x1, children=children)
        return x
@register
@serializable
 class DLA(nn.Layer):
    """
    DLA, see https://arxiv.org/pdf/1707.06484.pdf
    Args:
        depth (int): DLA depth, should be 34.
        residual_root (bool): whether use a reidual layer in the root block
    """
    def __init__(self, depth=34, residual_root=False):
        super(DLA, self).__init__()
        levels, channels = DLA_cfg[depth]
        if depth == 34:
            block = BasicBlock
        self.channels = channels
        self.base_layer = nn.Sequential(
            ConvNormLayer(
                3,
                channels[0],
                filter_size=7,
                stride=1,
                bias_on=False,
                norm_decay=None),
            nn.ReLU())
        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
        self.level1 = self._make_conv_level(
            channels[0], channels[1], levels[1], stride=2)
        self.level2 = Tree(
            levels[2],
            block,
            channels[1],
            channels[2],
            2,
            level_root=False,
            root_residual=residual_root)
        self.level3 = Tree(
            levels[3],
            block,
            channels[2],
            channels[3],
            2,
            level_root=True,
            root_residual=residual_root)
        self.level4 = Tree(
            levels[4],
            block,
            channels[3],
            channels[4],
            2,
            level_root=True,
            root_residual=residual_root)
        self.level5 = Tree(
            levels[5],
            block,
            channels[4],
            channels[5],
            2,
            level_root=True,
            root_residual=residual_root)
    def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1):
        modules = []
        for i in range(conv_num):
            modules.extend([
                ConvNormLayer(
                    ch_in,
                    ch_out,
                    filter_size=3,
                    stride=stride if i == 0 else 1,
                    bias_on=False,
                    norm_decay=None), nn.ReLU()
            ])
            ch_in = ch_out
        return nn.Sequential(*modules)
    @property
    def out_shape(self):
        return [ShapeSpec(channels=self.channels[i]) for i in range(6)]
    def forward(self, inputs):
        outs = []
        im = inputs['image']
        feats = self.base_layer(im)
        for i in range(6):
            feats = getattr(self, 'level{}'.format(i))(feats)
            outs.append(feats)
        return outs
--- a/build/lib/ppdet/modeling/backbones/ghostnet.py
+++ b/build/lib/ppdet/modeling/backbones/ghostnet.py
@ -0,0 +1,476 @@
 # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn import AdaptiveAvgPool2D, Linear
 from paddle.nn.initializer import Uniform
 from ppdet.core.workspace import register, serializable
 from numbers import Integral
 from ..shape_spec import ShapeSpec
 from .mobilenet_v3 import make_divisible, ConvBNLayer
 __all__ = ['GhostNet']
 class ExtraBlockDW(nn.Layer):
    def __init__(self,
                 in_c,
                 ch_1,
                 ch_2,
                 stride,
                 lr_mult,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 name=None):
        super(ExtraBlockDW, self).__init__()
        self.pointwise_conv = ConvBNLayer(
            in_c=in_c,
            out_c=ch_1,
            filter_size=1,
            stride=1,
            padding=0,
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra1")
        self.depthwise_conv = ConvBNLayer(
            in_c=ch_1,
            out_c=ch_2,
            filter_size=3,
            stride=stride,
            padding=1,  #
            num_groups=int(ch_1),
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra2_dw")
        self.normal_conv = ConvBNLayer(
            in_c=ch_2,
            out_c=ch_2,
            filter_size=1,
            stride=1,
            padding=0,
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra2_sep")
    def forward(self, inputs):
        x = self.pointwise_conv(inputs)
        x = self.depthwise_conv(x)
        x = self.normal_conv(x)
        return x
 class SEBlock(nn.Layer):
    def __init__(self, num_channels, lr_mult, reduction_ratio=4, name=None):
        super(SEBlock, self).__init__()
        self.pool2d_gap = AdaptiveAvgPool2D(1)
        self._num_channels = num_channels
        stdv = 1.0 / math.sqrt(num_channels * 1.0)
        med_ch = num_channels // reduction_ratio
        self.squeeze = Linear(
            num_channels,
            med_ch,
            weight_attr=ParamAttr(
                learning_rate=lr_mult,
                initializer=Uniform(-stdv, stdv),
                name=name + "_1_weights"),
            bias_attr=ParamAttr(
                learning_rate=lr_mult, name=name + "_1_offset"))
        stdv = 1.0 / math.sqrt(med_ch * 1.0)
        self.excitation = Linear(
            med_ch,
            num_channels,
            weight_attr=ParamAttr(
                learning_rate=lr_mult,
                initializer=Uniform(-stdv, stdv),
                name=name + "_2_weights"),
            bias_attr=ParamAttr(
                learning_rate=lr_mult, name=name + "_2_offset"))
    def forward(self, inputs):
        pool = self.pool2d_gap(inputs)
        pool = paddle.squeeze(pool, axis=[2, 3])
        squeeze = self.squeeze(pool)
        squeeze = F.relu(squeeze)
        excitation = self.excitation(squeeze)
        excitation = paddle.clip(x=excitation, min=0, max=1)
        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
        out = paddle.multiply(inputs, excitation)
        return out
 class GhostModule(nn.Layer):
    def __init__(self,
                 in_channels,
                 output_channels,
                 kernel_size=1,
                 ratio=2,
                 dw_size=3,
                 stride=1,
                 relu=True,
                 lr_mult=1.,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 name=None):
        super(GhostModule, self).__init__()
        init_channels = int(math.ceil(output_channels / ratio))
        new_channels = int(init_channels * (ratio - 1))
        self.primary_conv = ConvBNLayer(
            in_c=in_channels,
            out_c=init_channels,
            filter_size=kernel_size,
            stride=stride,
            padding=int((kernel_size - 1) // 2),
            num_groups=1,
            act="relu" if relu else None,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_primary_conv")
        self.cheap_operation = ConvBNLayer(
            in_c=init_channels,
            out_c=new_channels,
            filter_size=dw_size,
            stride=1,
            padding=int((dw_size - 1) // 2),
            num_groups=init_channels,
            act="relu" if relu else None,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_cheap_operation")
    def forward(self, inputs):
        x = self.primary_conv(inputs)
        y = self.cheap_operation(x)
        out = paddle.concat([x, y], axis=1)
        return out
 class GhostBottleneck(nn.Layer):
    def __init__(self,
                 in_channels,
                 hidden_dim,
                 output_channels,
                 kernel_size,
                 stride,
                 use_se,
                 lr_mult,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 return_list=False,
                 name=None):
        super(GhostBottleneck, self).__init__()
        self._stride = stride
        self._use_se = use_se
        self._num_channels = in_channels
        self._output_channels = output_channels
        self.return_list = return_list
        self.ghost_module_1 = GhostModule(
            in_channels=in_channels,
            output_channels=hidden_dim,
            kernel_size=1,
            stride=1,
            relu=True,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_ghost_module_1")
        if stride == 2:
            self.depthwise_conv = ConvBNLayer(
                in_c=hidden_dim,
                out_c=hidden_dim,
                filter_size=kernel_size,
                stride=stride,
                padding=int((kernel_size - 1) // 2),
                num_groups=hidden_dim,
                act=None,
                lr_mult=lr_mult,
                conv_decay=conv_decay,
                norm_type=norm_type,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                name=name +
                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
            )
        if use_se:
            self.se_block = SEBlock(hidden_dim, lr_mult, name=name + "_se")
        self.ghost_module_2 = GhostModule(
            in_channels=hidden_dim,
            output_channels=output_channels,
            kernel_size=1,
            relu=False,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_ghost_module_2")
        if stride != 1 or in_channels != output_channels:
            self.shortcut_depthwise = ConvBNLayer(
                in_c=in_channels,
                out_c=in_channels,
                filter_size=kernel_size,
                stride=stride,
                padding=int((kernel_size - 1) // 2),
                num_groups=in_channels,
                act=None,
                lr_mult=lr_mult,
                conv_decay=conv_decay,
                norm_type=norm_type,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                name=name +
                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
            )
            self.shortcut_conv = ConvBNLayer(
                in_c=in_channels,
                out_c=output_channels,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                act=None,
                lr_mult=lr_mult,
                conv_decay=conv_decay,
                norm_type=norm_type,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                name=name + "_shortcut_conv")
    def forward(self, inputs):
        y = self.ghost_module_1(inputs)
        x = y
        if self._stride == 2:
            x = self.depthwise_conv(x)
        if self._use_se:
            x = self.se_block(x)
        x = self.ghost_module_2(x)
        if self._stride == 1 and self._num_channels == self._output_channels:
            shortcut = inputs
        else:
            shortcut = self.shortcut_depthwise(inputs)
            shortcut = self.shortcut_conv(shortcut)
        x = paddle.add(x=x, y=shortcut)
        if self.return_list:
            return [y, x]
        else:
            return x
@register
@serializable
 class GhostNet(nn.Layer):
    __shared__ = ['norm_type']
    def __init__(
            self,
            scale=1.3,
            feature_maps=[6, 12, 15],
            with_extra_blocks=False,
            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
            conv_decay=0.,
            norm_type='bn',
            norm_decay=0.0,
            freeze_norm=False):
        super(GhostNet, self).__init__()
        if isinstance(feature_maps, Integral):
            feature_maps = [feature_maps]
        if norm_type == 'sync_bn' and freeze_norm:
            raise ValueError(
                "The norm_type should not be sync_bn when freeze_norm is True")
        self.feature_maps = feature_maps
        self.with_extra_blocks = with_extra_blocks
        self.extra_block_filters = extra_block_filters
        inplanes = 16
        self.cfgs = [
            # k, t, c, SE, s
            [3, 16, 16, 0, 1],
            [3, 48, 24, 0, 2],
            [3, 72, 24, 0, 1],
            [5, 72, 40, 1, 2],
            [5, 120, 40, 1, 1],
            [3, 240, 80, 0, 2],
            [3, 200, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 480, 112, 1, 1],
            [3, 672, 112, 1, 1],
            [5, 672, 160, 1, 2],  # SSDLite output
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1],
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1]
        ]
        self.scale = scale
        conv1_out_ch = int(make_divisible(inplanes * self.scale, 4))
        self.conv1 = ConvBNLayer(
            in_c=3,
            out_c=conv1_out_ch,
            filter_size=3,
            stride=2,
            padding=1,
            num_groups=1,
            act="relu",
            lr_mult=1.,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="conv1")
        # build inverted residual blocks
        self._out_channels = []
        self.ghost_bottleneck_list = []
        idx = 0
        inplanes = conv1_out_ch
        for k, exp_size, c, use_se, s in self.cfgs:
            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
            lr_mult = lr_mult_list[lr_idx]
            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
            return_list = self.with_extra_blocks and idx + 2 in self.feature_maps
            ghost_bottleneck = self.add_sublayer(
                "_ghostbottleneck_" + str(idx),
                sublayer=GhostBottleneck(
                    in_channels=inplanes,
                    hidden_dim=int(make_divisible(exp_size * self.scale, 4)),
                    output_channels=int(make_divisible(c * self.scale, 4)),
                    kernel_size=k,
                    stride=s,
                    use_se=use_se,
                    lr_mult=lr_mult,
                    conv_decay=conv_decay,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    return_list=return_list,
                    name="_ghostbottleneck_" + str(idx)))
            self.ghost_bottleneck_list.append(ghost_bottleneck)
            inplanes = int(make_divisible(c * self.scale, 4))
            idx += 1
            self._update_out_channels(
                int(make_divisible(exp_size * self.scale, 4))
                if return_list else inplanes, idx + 1, feature_maps)
        if self.with_extra_blocks:
            self.extra_block_list = []
            extra_out_c = int(make_divisible(self.scale * self.cfgs[-1][1], 4))
            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
            lr_mult = lr_mult_list[lr_idx]
            conv_extra = self.add_sublayer(
                "conv" + str(idx + 2),
                sublayer=ConvBNLayer(
                    in_c=inplanes,
                    out_c=extra_out_c,
                    filter_size=1,
                    stride=1,
                    padding=0,
                    num_groups=1,
                    act="relu6",
                    lr_mult=lr_mult,
                    conv_decay=conv_decay,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    name="conv" + str(idx + 2)))
            self.extra_block_list.append(conv_extra)
            idx += 1
            self._update_out_channels(extra_out_c, idx + 1, feature_maps)
            for j, block_filter in enumerate(self.extra_block_filters):
                in_c = extra_out_c if j == 0 else self.extra_block_filters[j -
                                                                           1][1]
                conv_extra = self.add_sublayer(
                    "conv" + str(idx + 2),
                    sublayer=ExtraBlockDW(
                        in_c,
                        block_filter[0],
                        block_filter[1],
                        stride=2,
                        lr_mult=lr_mult,
                        conv_decay=conv_decay,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name='conv' + str(idx + 2)))
                self.extra_block_list.append(conv_extra)
                idx += 1
                self._update_out_channels(block_filter[1], idx + 1,
                                          feature_maps)
    def _update_out_channels(self, channel, feature_idx, feature_maps):
        if feature_idx in feature_maps:
            self._out_channels.append(channel)
    def forward(self, inputs):
        x = self.conv1(inputs['image'])
        outs = []
        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
            x = ghost_bottleneck(x)
            if idx + 2 in self.feature_maps:
                if isinstance(x, list):
                    outs.append(x[0])
                    x = x[1]
                else:
                    outs.append(x)
        if not self.with_extra_blocks:
            return outs
        for i, block in enumerate(self.extra_block_list):
            idx = i + len(self.ghost_bottleneck_list)
            x = block(x)
            if idx + 2 in self.feature_maps:
                outs.append(x)
        return outs
    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/hrnet.py
+++ b/build/lib/ppdet/modeling/backbones/hrnet.py
@ -0,0 +1,724 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.regularizer import L2Decay
 from paddle import ParamAttr
 from paddle.nn.initializer import Normal
 from numbers import Integral
 import math
 from ppdet.core.workspace import register
 from ..shape_spec import ShapeSpec
 __all__ = ['HRNet']
 class ConvNormLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 norm_type='bn',
                 norm_groups=32,
                 use_dcn=False,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None,
                 name=None):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']
        self.act = act
        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=1,
            weight_attr=ParamAttr(
                name=name + "_weights", initializer=Normal(
                    mean=0., std=0.01)),
            bias_attr=False)
        norm_lr = 0. if freeze_norm else 1.
        norm_name = name + '_bn'
        param_attr = ParamAttr(
            name=norm_name + "_scale",
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay))
        bias_attr = ParamAttr(
            name=norm_name + "_offset",
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay))
        global_stats = True if freeze_norm else False
        if norm_type in ['bn', 'sync_bn']:
            self.norm = nn.BatchNorm(
                ch_out,
                param_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=global_stats,
                moving_mean_name=norm_name + '_mean',
                moving_variance_name=norm_name + '_variance')
        elif norm_type == 'gn':
            self.norm = nn.GroupNorm(
                num_groups=norm_groups,
                num_channels=ch_out,
                weight_attr=param_attr,
                bias_attr=bias_attr)
        norm_params = self.norm.parameters()
        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True
    def forward(self, inputs):
        out = self.conv(inputs)
        out = self.norm(out)
        if self.act == 'relu':
            out = F.relu(out)
        return out
 class Layer1(nn.Layer):
    def __init__(self,
                 num_channels,
                 has_se=False,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(Layer1, self).__init__()
        self.bottleneck_block_list = []
        for i in range(4):
            bottleneck_block = self.add_sublayer(
                "block_{}_{}".format(name, i + 1),
                BottleneckBlock(
                    num_channels=num_channels if i == 0 else 256,
                    num_filters=64,
                    has_se=has_se,
                    stride=1,
                    downsample=True if i == 0 else False,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    name=name + '_' + str(i + 1)))
            self.bottleneck_block_list.append(bottleneck_block)
    def forward(self, input):
        conv = input
        for block_func in self.bottleneck_block_list:
            conv = block_func(conv)
        return conv
 class TransitionLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(TransitionLayer, self).__init__()
        num_in = len(in_channels)
        num_out = len(out_channels)
        out = []
        self.conv_bn_func_list = []
        for i in range(num_out):
            residual = None
            if i < num_in:
                if in_channels[i] != out_channels[i]:
                    residual = self.add_sublayer(
                        "transition_{}_layer_{}".format(name, i + 1),
                        ConvNormLayer(
                            ch_in=in_channels[i],
                            ch_out=out_channels[i],
                            filter_size=3,
                            norm_decay=norm_decay,
                            freeze_norm=freeze_norm,
                            act='relu',
                            name=name + '_layer_' + str(i + 1)))
            else:
                residual = self.add_sublayer(
                    "transition_{}_layer_{}".format(name, i + 1),
                    ConvNormLayer(
                        ch_in=in_channels[-1],
                        ch_out=out_channels[i],
                        filter_size=3,
                        stride=2,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        act='relu',
                        name=name + '_layer_' + str(i + 1)))
            self.conv_bn_func_list.append(residual)
    def forward(self, input):
        outs = []
        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
            if conv_bn_func is None:
                outs.append(input[idx])
            else:
                if idx < len(input):
                    outs.append(conv_bn_func(input[idx]))
                else:
                    outs.append(conv_bn_func(input[-1]))
        return outs
 class Branches(nn.Layer):
    def __init__(self,
                 block_num,
                 in_channels,
                 out_channels,
                 has_se=False,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(Branches, self).__init__()
        self.basic_block_list = []
        for i in range(len(out_channels)):
            self.basic_block_list.append([])
            for j in range(block_num):
                in_ch = in_channels[i] if j == 0 else out_channels[i]
                basic_block_func = self.add_sublayer(
                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
                    BasicBlock(
                        num_channels=in_ch,
                        num_filters=out_channels[i],
                        has_se=has_se,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name=name + '_branch_layer_' + str(i + 1) + '_' +
                        str(j + 1)))
                self.basic_block_list[i].append(basic_block_func)
    def forward(self, inputs):
        outs = []
        for idx, input in enumerate(inputs):
            conv = input
            basic_block_list = self.basic_block_list[idx]
            for basic_block_func in basic_block_list:
                conv = basic_block_func(conv)
            outs.append(conv)
        return outs
 class BottleneckBlock(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 has_se,
                 stride=1,
                 downsample=False,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(BottleneckBlock, self).__init__()
        self.has_se = has_se
        self.downsample = downsample
        self.conv1 = ConvNormLayer(
            ch_in=num_channels,
            ch_out=num_filters,
            filter_size=1,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act="relu",
            name=name + "_conv1")
        self.conv2 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters,
            filter_size=3,
            stride=stride,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act="relu",
            name=name + "_conv2")
        self.conv3 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters * 4,
            filter_size=1,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act=None,
            name=name + "_conv3")
        if self.downsample:
            self.conv_down = ConvNormLayer(
                ch_in=num_channels,
                ch_out=num_filters * 4,
                filter_size=1,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                act=None,
                name=name + "_downsample")
        if self.has_se:
            self.se = SELayer(
                num_channels=num_filters * 4,
                num_filters=num_filters * 4,
                reduction_ratio=16,
                name='fc' + name)
    def forward(self, input):
        residual = input
        conv1 = self.conv1(input)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        if self.downsample:
            residual = self.conv_down(input)
        if self.has_se:
            conv3 = self.se(conv3)
        y = paddle.add(x=residual, y=conv3)
        y = F.relu(y)
        return y
 class BasicBlock(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 stride=1,
                 has_se=False,
                 downsample=False,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(BasicBlock, self).__init__()
        self.has_se = has_se
        self.downsample = downsample
        self.conv1 = ConvNormLayer(
            ch_in=num_channels,
            ch_out=num_filters,
            filter_size=3,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            stride=stride,
            act="relu",
            name=name + "_conv1")
        self.conv2 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters,
            filter_size=3,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            stride=1,
            act=None,
            name=name + "_conv2")
        if self.downsample:
            self.conv_down = ConvNormLayer(
                ch_in=num_channels,
                ch_out=num_filters * 4,
                filter_size=1,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                act=None,
                name=name + "_downsample")
        if self.has_se:
            self.se = SELayer(
                num_channels=num_filters,
                num_filters=num_filters,
                reduction_ratio=16,
                name='fc' + name)
    def forward(self, input):
        residual = input
        conv1 = self.conv1(input)
        conv2 = self.conv2(conv1)
        if self.downsample:
            residual = self.conv_down(input)
        if self.has_se:
            conv2 = self.se(conv2)
        y = paddle.add(x=residual, y=conv2)
        y = F.relu(y)
        return y
 class SELayer(nn.Layer):
    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
        super(SELayer, self).__init__()
        self.pool2d_gap = AdaptiveAvgPool2D(1)
        self._num_channels = num_channels
        med_ch = int(num_channels / reduction_ratio)
        stdv = 1.0 / math.sqrt(num_channels * 1.0)
        self.squeeze = Linear(
            num_channels,
            med_ch,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
            bias_attr=ParamAttr(name=name + '_sqz_offset'))
        stdv = 1.0 / math.sqrt(med_ch * 1.0)
        self.excitation = Linear(
            med_ch,
            num_filters,
            weight_attr=ParamAttr(
                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
            bias_attr=ParamAttr(name=name + '_exc_offset'))
    def forward(self, input):
        pool = self.pool2d_gap(input)
        pool = paddle.squeeze(pool, axis=[2, 3])
        squeeze = self.squeeze(pool)
        squeeze = F.relu(squeeze)
        excitation = self.excitation(squeeze)
        excitation = F.sigmoid(excitation)
        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
        out = input * excitation
        return out
 class Stage(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_modules,
                 num_filters,
                 has_se=False,
                 norm_decay=0.,
                 freeze_norm=True,
                 multi_scale_output=True,
                 name=None):
        super(Stage, self).__init__()
        self._num_modules = num_modules
        self.stage_func_list = []
        for i in range(num_modules):
            if i == num_modules - 1 and not multi_scale_output:
                stage_func = self.add_sublayer(
                    "stage_{}_{}".format(name, i + 1),
                    HighResolutionModule(
                        num_channels=num_channels,
                        num_filters=num_filters,
                        has_se=has_se,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        multi_scale_output=False,
                        name=name + '_' + str(i + 1)))
            else:
                stage_func = self.add_sublayer(
                    "stage_{}_{}".format(name, i + 1),
                    HighResolutionModule(
                        num_channels=num_channels,
                        num_filters=num_filters,
                        has_se=has_se,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name=name + '_' + str(i + 1)))
            self.stage_func_list.append(stage_func)
    def forward(self, input):
        out = input
        for idx in range(self._num_modules):
            out = self.stage_func_list[idx](out)
        return out
 class HighResolutionModule(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 has_se=False,
                 multi_scale_output=True,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(HighResolutionModule, self).__init__()
        self.branches_func = Branches(
            block_num=4,
            in_channels=num_channels,
            out_channels=num_filters,
            has_se=has_se,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name)
        self.fuse_func = FuseLayers(
            in_channels=num_filters,
            out_channels=num_filters,
            multi_scale_output=multi_scale_output,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name)
    def forward(self, input):
        out = self.branches_func(input)
        out = self.fuse_func(out)
        return out
 class FuseLayers(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 multi_scale_output=True,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(FuseLayers, self).__init__()
        self._actual_ch = len(in_channels) if multi_scale_output else 1
        self._in_channels = in_channels
        self.residual_func_list = []
        for i in range(self._actual_ch):
            for j in range(len(in_channels)):
                residual_func = None
                if j > i:
                    residual_func = self.add_sublayer(
                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
                        ConvNormLayer(
                            ch_in=in_channels[j],
                            ch_out=out_channels[i],
                            filter_size=1,
                            stride=1,
                            act=None,
                            norm_decay=norm_decay,
                            freeze_norm=freeze_norm,
                            name=name + '_layer_' + str(i + 1) + '_' +
                            str(j + 1)))
                    self.residual_func_list.append(residual_func)
                elif j < i:
                    pre_num_filters = in_channels[j]
                    for k in range(i - j):
                        if k == i - j - 1:
                            residual_func = self.add_sublayer(
                                "residual_{}_layer_{}_{}_{}".format(
                                    name, i + 1, j + 1, k + 1),
                                ConvNormLayer(
                                    ch_in=pre_num_filters,
                                    ch_out=out_channels[i],
                                    filter_size=3,
                                    stride=2,
                                    norm_decay=norm_decay,
                                    freeze_norm=freeze_norm,
                                    act=None,
                                    name=name + '_layer_' + str(i + 1) + '_' +
                                    str(j + 1) + '_' + str(k + 1)))
                            pre_num_filters = out_channels[i]
                        else:
                            residual_func = self.add_sublayer(
                                "residual_{}_layer_{}_{}_{}".format(
                                    name, i + 1, j + 1, k + 1),
                                ConvNormLayer(
                                    ch_in=pre_num_filters,
                                    ch_out=out_channels[j],
                                    filter_size=3,
                                    stride=2,
                                    norm_decay=norm_decay,
                                    freeze_norm=freeze_norm,
                                    act="relu",
                                    name=name + '_layer_' + str(i + 1) + '_' +
                                    str(j + 1) + '_' + str(k + 1)))
                            pre_num_filters = out_channels[j]
                        self.residual_func_list.append(residual_func)
    def forward(self, input):
        outs = []
        residual_func_idx = 0
        for i in range(self._actual_ch):
            residual = input[i]
            for j in range(len(self._in_channels)):
                if j > i:
                    y = self.residual_func_list[residual_func_idx](input[j])
                    residual_func_idx += 1
                    y = F.interpolate(y, scale_factor=2**(j - i))
                    residual = paddle.add(x=residual, y=y)
                elif j < i:
                    y = input[j]
                    for k in range(i - j):
                        y = self.residual_func_list[residual_func_idx](y)
                        residual_func_idx += 1
                    residual = paddle.add(x=residual, y=y)
            residual = F.relu(residual)
            outs.append(residual)
        return outs
@register
 class HRNet(nn.Layer):
    """
    HRNet, see https://arxiv.org/abs/1908.07919
    Args:
        width (int): the width of HRNet
        has_se (bool): whether to add SE block for each stage
        freeze_at (int): the stage to freeze
        freeze_norm (bool): whether to freeze norm in HRNet
        norm_decay (float): weight decay for normalization layer weights
        return_idx (List): the stage to return
    """
    def __init__(self,
                 width=18,
                 has_se=False,
                 freeze_at=0,
                 freeze_norm=True,
                 norm_decay=0.,
                 return_idx=[0, 1, 2, 3]):
        super(HRNet, self).__init__()
        self.width = width
        self.has_se = has_se
        if isinstance(return_idx, Integral):
            return_idx = [return_idx]
        assert len(return_idx) > 0, "need one or more return index"
        self.freeze_at = freeze_at
        self.return_idx = return_idx
        self.channels = {
            18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
            30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
            32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
            40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
            44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
            48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
            60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
            64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]]
        }
        channels_2, channels_3, channels_4 = self.channels[width]
        num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
        self._out_channels = channels_4
        self._out_strides = [4, 8, 16, 32]
        self.conv_layer1_1 = ConvNormLayer(
            ch_in=3,
            ch_out=64,
            filter_size=3,
            stride=2,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act='relu',
            name="layer1_1")
        self.conv_layer1_2 = ConvNormLayer(
            ch_in=64,
            ch_out=64,
            filter_size=3,
            stride=2,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act='relu',
            name="layer1_2")
        self.la1 = Layer1(
            num_channels=64,
            has_se=has_se,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="layer2")
        self.tr1 = TransitionLayer(
            in_channels=[256],
            out_channels=channels_2,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr1")
        self.st2 = Stage(
            num_channels=channels_2,
            num_modules=num_modules_2,
            num_filters=channels_2,
            has_se=self.has_se,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="st2")
        self.tr2 = TransitionLayer(
            in_channels=channels_2,
            out_channels=channels_3,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr2")
        self.st3 = Stage(
            num_channels=channels_3,
            num_modules=num_modules_3,
            num_filters=channels_3,
            has_se=self.has_se,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="st3")
        self.tr3 = TransitionLayer(
            in_channels=channels_3,
            out_channels=channels_4,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr3")
        self.st4 = Stage(
            num_channels=channels_4,
            num_modules=num_modules_4,
            num_filters=channels_4,
            has_se=self.has_se,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            multi_scale_output=len(return_idx) > 1,
            name="st4")
    def forward(self, inputs):
        x = inputs['image']
        conv1 = self.conv_layer1_1(x)
        conv2 = self.conv_layer1_2(conv1)
        la1 = self.la1(conv2)
        tr1 = self.tr1([la1])
        st2 = self.st2(tr1)
        tr2 = self.tr2(st2)
        st3 = self.st3(tr2)
        tr3 = self.tr3(st3)
        st4 = self.st4(tr3)
        res = []
        for i, layer in enumerate(st4):
            if i == self.freeze_at:
                layer.stop_gradient = True
            if i in self.return_idx:
                res.append(layer)
        return res
    @property
    def out_shape(self):
        return [
            ShapeSpec(
                channels=self._out_channels[i], stride=self._out_strides[i])
            for i in self.return_idx
        ]
--- a/build/lib/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/build/lib/ppdet/modeling/backbones/mobilenet_v1.py
@ -0,0 +1,409 @@
 # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import KaimingNormal
 from ppdet.core.workspace import register, serializable
 from numbers import Integral
 from ..shape_spec import ShapeSpec
 __all__ = ['MobileNet']
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 num_groups=1,
                 act='relu',
                 conv_lr=1.,
                 conv_decay=0.,
                 norm_decay=0.,
                 norm_type='bn',
                 name=None):
        super(ConvBNLayer, self).__init__()
        self.act = act
        self._conv = nn.Conv2D(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=num_groups,
            weight_attr=ParamAttr(
                learning_rate=conv_lr,
                initializer=KaimingNormal(),
                regularizer=L2Decay(conv_decay)),
            bias_attr=False)
        param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
        bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
        if norm_type == 'sync_bn':
            self._batch_norm = nn.SyncBatchNorm(
                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
        else:
            self._batch_norm = nn.BatchNorm(
                out_channels,
                act=None,
                param_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=False)
    def forward(self, x):
        x = self._conv(x)
        x = self._batch_norm(x)
        if self.act == "relu":
            x = F.relu(x)
        elif self.act == "relu6":
            x = F.relu6(x)
        return x
 class DepthwiseSeparable(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels1,
                 out_channels2,
                 num_groups,
                 stride,
                 scale,
                 conv_lr=1.,
                 conv_decay=0.,
                 norm_decay=0.,
                 norm_type='bn',
                 name=None):
        super(DepthwiseSeparable, self).__init__()
        self._depthwise_conv = ConvBNLayer(
            in_channels,
            int(out_channels1 * scale),
            kernel_size=3,
            stride=stride,
            padding=1,
            num_groups=int(num_groups * scale),
            conv_lr=conv_lr,
            conv_decay=conv_decay,
            norm_decay=norm_decay,
            norm_type=norm_type,
            name=name + "_dw")
        self._pointwise_conv = ConvBNLayer(
            int(out_channels1 * scale),
            int(out_channels2 * scale),
            kernel_size=1,
            stride=1,
            padding=0,
            conv_lr=conv_lr,
            conv_decay=conv_decay,
            norm_decay=norm_decay,
            norm_type=norm_type,
            name=name + "_sep")
    def forward(self, x):
        x = self._depthwise_conv(x)
        x = self._pointwise_conv(x)
        return x
 class ExtraBlock(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels1,
                 out_channels2,
                 num_groups=1,
                 stride=2,
                 conv_lr=1.,
                 conv_decay=0.,
                 norm_decay=0.,
                 norm_type='bn',
                 name=None):
        super(ExtraBlock, self).__init__()
        self.pointwise_conv = ConvBNLayer(
            in_channels,
            int(out_channels1),
            kernel_size=1,
            stride=1,
            padding=0,
            num_groups=int(num_groups),
            act='relu6',
            conv_lr=conv_lr,
            conv_decay=conv_decay,
            norm_decay=norm_decay,
            norm_type=norm_type,
            name=name + "_extra1")
        self.normal_conv = ConvBNLayer(
            int(out_channels1),
            int(out_channels2),
            kernel_size=3,
            stride=stride,
            padding=1,
            num_groups=int(num_groups),
            act='relu6',
            conv_lr=conv_lr,
            conv_decay=conv_decay,
            norm_decay=norm_decay,
            norm_type=norm_type,
            name=name + "_extra2")
    def forward(self, x):
        x = self.pointwise_conv(x)
        x = self.normal_conv(x)
        return x
@register
@serializable
 class MobileNet(nn.Layer):
    __shared__ = ['norm_type']
    def __init__(self,
                 norm_type='bn',
                 norm_decay=0.,
                 conv_decay=0.,
                 scale=1,
                 conv_learning_rate=1.0,
                 feature_maps=[4, 6, 13],
                 with_extra_blocks=False,
                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
                                      [64, 128]]):
        super(MobileNet, self).__init__()
        if isinstance(feature_maps, Integral):
            feature_maps = [feature_maps]
        self.feature_maps = feature_maps
        self.with_extra_blocks = with_extra_blocks
        self.extra_block_filters = extra_block_filters
        self._out_channels = []
        self.conv1 = ConvBNLayer(
            in_channels=3,
            out_channels=int(32 * scale),
            kernel_size=3,
            stride=2,
            padding=1,
            conv_lr=conv_learning_rate,
            conv_decay=conv_decay,
            norm_decay=norm_decay,
            norm_type=norm_type,
            name="conv1")
        self.dwsl = []
        dws21 = self.add_sublayer(
            "conv2_1",
            sublayer=DepthwiseSeparable(
                in_channels=int(32 * scale),
                out_channels1=32,
                out_channels2=64,
                num_groups=32,
                stride=1,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv2_1"))
        self.dwsl.append(dws21)
        self._update_out_channels(64, len(self.dwsl), feature_maps)
        dws22 = self.add_sublayer(
            "conv2_2",
            sublayer=DepthwiseSeparable(
                in_channels=int(64 * scale),
                out_channels1=64,
                out_channels2=128,
                num_groups=64,
                stride=2,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv2_2"))
        self.dwsl.append(dws22)
        self._update_out_channels(128, len(self.dwsl), feature_maps)
        # 1/4
        dws31 = self.add_sublayer(
            "conv3_1",
            sublayer=DepthwiseSeparable(
                in_channels=int(128 * scale),
                out_channels1=128,
                out_channels2=128,
                num_groups=128,
                stride=1,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv3_1"))
        self.dwsl.append(dws31)
        self._update_out_channels(128, len(self.dwsl), feature_maps)
        dws32 = self.add_sublayer(
            "conv3_2",
            sublayer=DepthwiseSeparable(
                in_channels=int(128 * scale),
                out_channels1=128,
                out_channels2=256,
                num_groups=128,
                stride=2,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv3_2"))
        self.dwsl.append(dws32)
        self._update_out_channels(256, len(self.dwsl), feature_maps)
        # 1/8
        dws41 = self.add_sublayer(
            "conv4_1",
            sublayer=DepthwiseSeparable(
                in_channels=int(256 * scale),
                out_channels1=256,
                out_channels2=256,
                num_groups=256,
                stride=1,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv4_1"))
        self.dwsl.append(dws41)
        self._update_out_channels(256, len(self.dwsl), feature_maps)
        dws42 = self.add_sublayer(
            "conv4_2",
            sublayer=DepthwiseSeparable(
                in_channels=int(256 * scale),
                out_channels1=256,
                out_channels2=512,
                num_groups=256,
                stride=2,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv4_2"))
        self.dwsl.append(dws42)
        self._update_out_channels(512, len(self.dwsl), feature_maps)
        # 1/16
        for i in range(5):
            tmp = self.add_sublayer(
                "conv5_" + str(i + 1),
                sublayer=DepthwiseSeparable(
                    in_channels=512,
                    out_channels1=512,
                    out_channels2=512,
                    num_groups=512,
                    stride=1,
                    scale=scale,
                    conv_lr=conv_learning_rate,
                    conv_decay=conv_decay,
                    norm_decay=norm_decay,
                    norm_type=norm_type,
                    name="conv5_" + str(i + 1)))
            self.dwsl.append(tmp)
            self._update_out_channels(512, len(self.dwsl), feature_maps)
        dws56 = self.add_sublayer(
            "conv5_6",
            sublayer=DepthwiseSeparable(
                in_channels=int(512 * scale),
                out_channels1=512,
                out_channels2=1024,
                num_groups=512,
                stride=2,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv5_6"))
        self.dwsl.append(dws56)
        self._update_out_channels(1024, len(self.dwsl), feature_maps)
        # 1/32
        dws6 = self.add_sublayer(
            "conv6",
            sublayer=DepthwiseSeparable(
                in_channels=int(1024 * scale),
                out_channels1=1024,
                out_channels2=1024,
                num_groups=1024,
                stride=1,
                scale=scale,
                conv_lr=conv_learning_rate,
                conv_decay=conv_decay,
                norm_decay=norm_decay,
                norm_type=norm_type,
                name="conv6"))
        self.dwsl.append(dws6)
        self._update_out_channels(1024, len(self.dwsl), feature_maps)
        if self.with_extra_blocks:
            self.extra_blocks = []
            for i, block_filter in enumerate(self.extra_block_filters):
                in_c = 1024 if i == 0 else self.extra_block_filters[i - 1][1]
                conv_extra = self.add_sublayer(
                    "conv7_" + str(i + 1),
                    sublayer=ExtraBlock(
                        in_c,
                        block_filter[0],
                        block_filter[1],
                        conv_lr=conv_learning_rate,
                        conv_decay=conv_decay,
                        norm_decay=norm_decay,
                        norm_type=norm_type,
                        name="conv7_" + str(i + 1)))
                self.extra_blocks.append(conv_extra)
                self._update_out_channels(
                    block_filter[1],
                    len(self.dwsl) + len(self.extra_blocks), feature_maps)
    def _update_out_channels(self, channel, feature_idx, feature_maps):
        if feature_idx in feature_maps:
            self._out_channels.append(channel)
    def forward(self, inputs):
        outs = []
        y = self.conv1(inputs['image'])
        for i, block in enumerate(self.dwsl):
            y = block(y)
            if i + 1 in self.feature_maps:
                outs.append(y)
        if not self.with_extra_blocks:
            return outs
        y = outs[-1]
        for i, block in enumerate(self.extra_blocks):
            idx = i + len(self.dwsl)
            y = block(y)
            if idx + 1 in self.feature_maps:
                outs.append(y)
        return outs
    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/mobilenet_v3.py
+++ b/build/lib/ppdet/modeling/backbones/mobilenet_v3.py
@ -0,0 +1,496 @@
 # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register, serializable
 from numbers import Integral
 from ..shape_spec import ShapeSpec
 __all__ = ['MobileNetV3']
 def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_c,
                 out_c,
                 filter_size,
                 stride,
                 padding,
                 num_groups=1,
                 act=None,
                 lr_mult=1.,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 name=""):
        super(ConvBNLayer, self).__init__()
        self.act = act
        self.conv = nn.Conv2D(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=filter_size,
            stride=stride,
            padding=padding,
            groups=num_groups,
            weight_attr=ParamAttr(
                learning_rate=lr_mult,
                regularizer=L2Decay(conv_decay),
                name=name + "_weights"),
            bias_attr=False)
        norm_lr = 0. if freeze_norm else lr_mult
        param_attr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),
            name=name + "_bn_scale",
            trainable=False if freeze_norm else True)
        bias_attr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),
            name=name + "_bn_offset",
            trainable=False if freeze_norm else True)
        global_stats = True if freeze_norm else False
        if norm_type == 'sync_bn':
            self.bn = nn.SyncBatchNorm(
                out_c, weight_attr=param_attr, bias_attr=bias_attr)
        else:
            self.bn = nn.BatchNorm(
                out_c,
                act=None,
                param_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=global_stats,
                moving_mean_name=name + '_bn_mean',
                moving_variance_name=name + '_bn_variance')
        norm_params = self.bn.parameters()
        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.act is not None:
            if self.act == "relu":
                x = F.relu(x)
            elif self.act == "relu6":
                x = F.relu6(x)
            elif self.act == "hard_swish":
                x = F.hardswish(x)
            else:
                raise NotImplementedError(
                    "The activation function is selected incorrectly.")
        return x
 class ResidualUnit(nn.Layer):
    def __init__(self,
                 in_c,
                 mid_c,
                 out_c,
                 filter_size,
                 stride,
                 use_se,
                 lr_mult,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None,
                 return_list=False,
                 name=''):
        super(ResidualUnit, self).__init__()
        self.if_shortcut = stride == 1 and in_c == out_c
        self.use_se = use_se
        self.return_list = return_list
        self.expand_conv = ConvBNLayer(
            in_c=in_c,
            out_c=mid_c,
            filter_size=1,
            stride=1,
            padding=0,
            act=act,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_expand")
        self.bottleneck_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=mid_c,
            filter_size=filter_size,
            stride=stride,
            padding=int((filter_size - 1) // 2),
            num_groups=mid_c,
            act=act,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_depthwise")
        if self.use_se:
            self.mid_se = SEModule(
                mid_c, lr_mult, conv_decay, name=name + "_se")
        self.linear_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=out_c,
            filter_size=1,
            stride=1,
            padding=0,
            act=None,
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_linear")
    def forward(self, inputs):
        y = self.expand_conv(inputs)
        x = self.bottleneck_conv(y)
        if self.use_se:
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
            x = paddle.add(inputs, x)
        if self.return_list:
            return [y, x]
        else:
            return x
 class SEModule(nn.Layer):
    def __init__(self, channel, lr_mult, conv_decay, reduction=4, name=""):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2D(1)
        mid_channels = int(channel // reduction)
        self.conv1 = nn.Conv2D(
            in_channels=channel,
            out_channels=mid_channels,
            kernel_size=1,
            stride=1,
            padding=0,
            weight_attr=ParamAttr(
                learning_rate=lr_mult,
                regularizer=L2Decay(conv_decay),
                name=name + "_1_weights"),
            bias_attr=ParamAttr(
                learning_rate=lr_mult,
                regularizer=L2Decay(conv_decay),
                name=name + "_1_offset"))
        self.conv2 = nn.Conv2D(
            in_channels=mid_channels,
            out_channels=channel,
            kernel_size=1,
            stride=1,
            padding=0,
            weight_attr=ParamAttr(
                learning_rate=lr_mult,
                regularizer=L2Decay(conv_decay),
                name=name + "_2_weights"),
            bias_attr=ParamAttr(
                learning_rate=lr_mult,
                regularizer=L2Decay(conv_decay),
                name=name + "_2_offset"))
    def forward(self, inputs):
        outputs = self.avg_pool(inputs)
        outputs = self.conv1(outputs)
        outputs = F.relu(outputs)
        outputs = self.conv2(outputs)
        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
        return paddle.multiply(x=inputs, y=outputs)
 class ExtraBlockDW(nn.Layer):
    def __init__(self,
                 in_c,
                 ch_1,
                 ch_2,
                 stride,
                 lr_mult,
                 conv_decay=0.,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=False,
                 name=None):
        super(ExtraBlockDW, self).__init__()
        self.pointwise_conv = ConvBNLayer(
            in_c=in_c,
            out_c=ch_1,
            filter_size=1,
            stride=1,
            padding='SAME',
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra1")
        self.depthwise_conv = ConvBNLayer(
            in_c=ch_1,
            out_c=ch_2,
            filter_size=3,
            stride=stride,
            padding='SAME',
            num_groups=int(ch_1),
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra2_dw")
        self.normal_conv = ConvBNLayer(
            in_c=ch_2,
            out_c=ch_2,
            filter_size=1,
            stride=1,
            padding='SAME',
            act='relu6',
            lr_mult=lr_mult,
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name + "_extra2_sep")
    def forward(self, inputs):
        x = self.pointwise_conv(inputs)
        x = self.depthwise_conv(x)
        x = self.normal_conv(x)
        return x
@register
@serializable
 class MobileNetV3(nn.Layer):
    __shared__ = ['norm_type']
    def __init__(
            self,
            scale=1.0,
            model_name="large",
            feature_maps=[6, 12, 15],
            with_extra_blocks=False,
            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
            conv_decay=0.0,
            multiplier=1.0,
            norm_type='bn',
            norm_decay=0.0,
            freeze_norm=False):
        super(MobileNetV3, self).__init__()
        if isinstance(feature_maps, Integral):
            feature_maps = [feature_maps]
        if norm_type == 'sync_bn' and freeze_norm:
            raise ValueError(
                "The norm_type should not be sync_bn when freeze_norm is True")
        self.feature_maps = feature_maps
        self.with_extra_blocks = with_extra_blocks
        self.extra_block_filters = extra_block_filters
        inplanes = 16
        if model_name == "large":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, "relu", 1],
                [3, 64, 24, False, "relu", 2],
                [3, 72, 24, False, "relu", 1],
                [5, 72, 40, True, "relu", 2],  # RCNN output
                [5, 120, 40, True, "relu", 1],
                [5, 120, 40, True, "relu", 1],  # YOLOv3 output
                [3, 240, 80, False, "hard_swish", 2],  # RCNN output
                [3, 200, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 480, 112, True, "hard_swish", 1],
                [3, 672, 112, True, "hard_swish", 1],  # YOLOv3 output
                [5, 672, 160, True, "hard_swish", 2],  # SSD/SSDLite/RCNN output
                [5, 960, 160, True, "hard_swish", 1],
                [5, 960, 160, True, "hard_swish", 1],  # YOLOv3 output
            ]
        elif model_name == "small":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, "relu", 2],
                [3, 72, 24, False, "relu", 2],  # RCNN output
                [3, 88, 24, False, "relu", 1],  # YOLOv3 output
                [5, 96, 40, True, "hard_swish", 2],  # RCNN output
                [5, 240, 40, True, "hard_swish", 1],
                [5, 240, 40, True, "hard_swish", 1],
                [5, 120, 48, True, "hard_swish", 1],
                [5, 144, 48, True, "hard_swish", 1],  # YOLOv3 output
                [5, 288, 96, True, "hard_swish", 2],  # SSD/SSDLite/RCNN output
                [5, 576, 96, True, "hard_swish", 1],
                [5, 576, 96, True, "hard_swish", 1],  # YOLOv3 output
            ]
        else:
            raise NotImplementedError(
                "mode[{}_model] is not implemented!".format(model_name))
        if multiplier != 1.0:
            self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier)
            self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier)
            self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier)
            self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier)
            self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier)
        self.conv1 = ConvBNLayer(
            in_c=3,
            out_c=make_divisible(inplanes * scale),
            filter_size=3,
            stride=2,
            padding=1,
            num_groups=1,
            act="hard_swish",
            lr_mult=lr_mult_list[0],
            conv_decay=conv_decay,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="conv1")
        self._out_channels = []
        self.block_list = []
        i = 0
        inplanes = make_divisible(inplanes * scale)
        for (k, exp, c, se, nl, s) in self.cfg:
            lr_idx = min(i // 3, len(lr_mult_list) - 1)
            lr_mult = lr_mult_list[lr_idx]
            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
            return_list = self.with_extra_blocks and i + 2 in self.feature_maps
            block = self.add_sublayer(
                "conv" + str(i + 2),
                sublayer=ResidualUnit(
                    in_c=inplanes,
                    mid_c=make_divisible(scale * exp),
                    out_c=make_divisible(scale * c),
                    filter_size=k,
                    stride=s,
                    use_se=se,
                    act=nl,
                    lr_mult=lr_mult,
                    conv_decay=conv_decay,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    return_list=return_list,
                    name="conv" + str(i + 2)))
            self.block_list.append(block)
            inplanes = make_divisible(scale * c)
            i += 1
            self._update_out_channels(
                make_divisible(scale * exp)
                if return_list else inplanes, i + 1, feature_maps)
        if self.with_extra_blocks:
            self.extra_block_list = []
            extra_out_c = make_divisible(scale * self.cfg[-1][1])
            lr_idx = min(i // 3, len(lr_mult_list) - 1)
            lr_mult = lr_mult_list[lr_idx]
            conv_extra = self.add_sublayer(
                "conv" + str(i + 2),
                sublayer=ConvBNLayer(
                    in_c=inplanes,
                    out_c=extra_out_c,
                    filter_size=1,
                    stride=1,
                    padding=0,
                    num_groups=1,
                    act="hard_swish",
                    lr_mult=lr_mult,
                    conv_decay=conv_decay,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    name="conv" + str(i + 2)))
            self.extra_block_list.append(conv_extra)
            i += 1
            self._update_out_channels(extra_out_c, i + 1, feature_maps)
            for j, block_filter in enumerate(self.extra_block_filters):
                in_c = extra_out_c if j == 0 else self.extra_block_filters[j -
                                                                           1][1]
                conv_extra = self.add_sublayer(
                    "conv" + str(i + 2),
                    sublayer=ExtraBlockDW(
                        in_c,
                        block_filter[0],
                        block_filter[1],
                        stride=2,
                        lr_mult=lr_mult,
                        conv_decay=conv_decay,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name='conv' + str(i + 2)))
                self.extra_block_list.append(conv_extra)
                i += 1
                self._update_out_channels(block_filter[1], i + 1, feature_maps)
    def _update_out_channels(self, channel, feature_idx, feature_maps):
        if feature_idx in feature_maps:
            self._out_channels.append(channel)
    def forward(self, inputs):
        x = self.conv1(inputs['image'])
        outs = []
        for idx, block in enumerate(self.block_list):
            x = block(x)
            if idx + 2 in self.feature_maps:
                if isinstance(x, list):
                    outs.append(x[0])
                    x = x[1]
                else:
                    outs.append(x)
        if not self.with_extra_blocks:
            return outs
        for i, block in enumerate(self.extra_block_list):
            idx = i + len(self.block_list)
            x = block(x)
            if idx + 2 in self.feature_maps:
                outs.append(x)
        return outs
    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/backbones/name_adapter.py
+++ b/build/lib/ppdet/modeling/backbones/name_adapter.py
@ -0,0 +1,69 @@
 class NameAdapter(object):
    """Fix the backbones variable names for pretrained weight"""
    def __init__(self, model):
        super(NameAdapter, self).__init__()
        self.model = model
    @property
    def model_type(self):
        return getattr(self.model, '_model_type', '')
    @property
    def variant(self):
        return getattr(self.model, 'variant', '')
    def fix_conv_norm_name(self, name):
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
        # the naming rule is same as pretrained weight
        if self.model_type == 'SEResNeXt':
            bn_name = name + "_bn"
        return bn_name
    def fix_shortcut_name(self, name):
        if self.model_type == 'SEResNeXt':
            name = 'conv' + name + '_prj'
        return name
    def fix_bottleneck_name(self, name):
        if self.model_type == 'SEResNeXt':
            conv_name1 = 'conv' + name + '_x1'
            conv_name2 = 'conv' + name + '_x2'
            conv_name3 = 'conv' + name + '_x3'
            shortcut_name = name
        else:
            conv_name1 = name + "_branch2a"
            conv_name2 = name + "_branch2b"
            conv_name3 = name + "_branch2c"
            shortcut_name = name + "_branch1"
        return conv_name1, conv_name2, conv_name3, shortcut_name
    def fix_basicblock_name(self, name):
        if self.model_type == 'SEResNeXt':
            conv_name1 = 'conv' + name + '_x1'
            conv_name2 = 'conv' + name + '_x2'
            shortcut_name = name
        else:
            conv_name1 = name + "_branch2a"
            conv_name2 = name + "_branch2b"
            shortcut_name = name + "_branch1"
        return conv_name1, conv_name2, shortcut_name
    def fix_layer_warp_name(self, stage_num, count, i):
        name = 'res' + str(stage_num)
        if count > 10 and stage_num == 4:
            if i == 0:
                conv_name = name + "a"
            else:
                conv_name = name + "b" + str(i)
        else:
            conv_name = name + chr(ord("a") + i)
        if self.model_type == 'SEResNeXt':
            conv_name = str(stage_num + 2) + '_' + str(i + 1)
        return conv_name
    def fix_c1_stage_name(self):
        return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
--- a/build/lib/ppdet/modeling/backbones/res2net.py
+++ b/build/lib/ppdet/modeling/backbones/res2net.py
@ -0,0 +1,357 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from numbers import Integral
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register, serializable
 from ..shape_spec import ShapeSpec
 from .resnet import ConvNormLayer
 __all__ = ['Res2Net', 'Res2NetC5']
 Res2Net_cfg = {
    50: [3, 4, 6, 3],
    101: [3, 4, 23, 3],
    152: [3, 8, 36, 3],
    200: [3, 12, 48, 3]
 }
 class BottleNeck(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 stride,
                 shortcut,
                 width,
                 scales=4,
                 variant='b',
                 groups=1,
                 lr=1.0,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 dcn_v2=False):
        super(BottleNeck, self).__init__()
        self.shortcut = shortcut
        self.scales = scales
        self.stride = stride
        if not shortcut:
            if variant == 'd' and stride == 2:
                self.branch1 = nn.Sequential()
                self.branch1.add_sublayer(
                    'pool',
                    nn.AvgPool2D(
                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
                self.branch1.add_sublayer(
                    'conv',
                    ConvNormLayer(
                        ch_in=ch_in,
                        ch_out=ch_out,
                        filter_size=1,
                        stride=1,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        lr=lr))
            else:
                self.branch1 = ConvNormLayer(
                    ch_in=ch_in,
                    ch_out=ch_out,
                    filter_size=1,
                    stride=stride,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    lr=lr)
        self.branch2a = ConvNormLayer(
            ch_in=ch_in,
            ch_out=width * scales,
            filter_size=1,
            stride=stride if variant == 'a' else 1,
            groups=1,
            act='relu',
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
        self.branch2b = nn.LayerList([
            ConvNormLayer(
                ch_in=width,
                ch_out=width,
                filter_size=3,
                stride=1 if variant == 'a' else stride,
                groups=groups,
                act='relu',
                norm_type=norm_type,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                lr=lr,
                dcn_v2=dcn_v2) for _ in range(self.scales - 1)
        ])
        self.branch2c = ConvNormLayer(
            ch_in=width * scales,
            ch_out=ch_out,
            filter_size=1,
            stride=1,
            groups=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
    def forward(self, inputs):
        out = self.branch2a(inputs)
        feature_split = paddle.split(out, self.scales, 1)
        out_split = []
        for i in range(self.scales - 1):
            if i == 0 or self.stride == 2:
                out_split.append(self.branch2b[i](feature_split[i]))
            else:
                out_split.append(self.branch2b[i](paddle.add(feature_split[i],
                                                             out_split[-1])))
        if self.stride == 1:
            out_split.append(feature_split[-1])
        else:
            out_split.append(F.avg_pool2d(feature_split[-1], 3, self.stride, 1))
        out = self.branch2c(paddle.concat(out_split, 1))
        if self.shortcut:
            short = inputs
        else:
            short = self.branch1(inputs)
        out = paddle.add(out, short)
        out = F.relu(out)
        return out
 class Blocks(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 count,
                 stage_num,
                 width,
                 scales=4,
                 variant='b',
                 groups=1,
                 lr=1.0,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 dcn_v2=False):
        super(Blocks, self).__init__()
        self.blocks = nn.Sequential()
        for i in range(count):
            self.blocks.add_sublayer(
                str(i),
                BottleNeck(
                    ch_in=ch_in if i == 0 else ch_out,
                    ch_out=ch_out,
                    stride=2 if i == 0 and stage_num != 2 else 1,
                    shortcut=False if i == 0 else True,
                    width=width * (2**(stage_num - 2)),
                    scales=scales,
                    variant=variant,
                    groups=groups,
                    lr=lr,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    dcn_v2=dcn_v2))
    def forward(self, inputs):
        return self.blocks(inputs)
@register
@serializable
 class Res2Net(nn.Layer):
    """
    Res2Net, see https://arxiv.org/abs/1904.01169
    Args:
        depth (int): Res2Net depth, should be 50, 101, 152, 200.
        width (int): Res2Net width
        scales (int): Res2Net scale
        variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently
        lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
                             lower learning rate ratio is need for pretrained model
                             got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
        groups (int): The groups number of the Conv Layer.
        norm_type (str): normalization type, 'bn' or 'sync_bn'
        norm_decay (float): weight decay for normalization layer weights
        freeze_norm (bool): freeze normalization layers
        freeze_at (int): freeze the backbone at which stage
        return_idx (list): index of stages whose feature maps are returned,
                           index 0 stands for res2
        dcn_v2_stages (list): index of stages who select deformable conv v2
        num_stages (int): number of stages created
    """
    __shared__ = ['norm_type']
    def __init__(self,
                 depth=50,
                 width=26,
                 scales=4,
                 variant='b',
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
                 groups=1,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 freeze_at=0,
                 return_idx=[0, 1, 2, 3],
                 dcn_v2_stages=[-1],
                 num_stages=4):
        super(Res2Net, self).__init__()
        self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt'
        assert depth in [50, 101, 152, 200], \
            "depth {} not in [50, 101, 152, 200]"
        assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant"
        assert num_stages >= 1 and num_stages <= 4
        self.depth = depth
        self.variant = variant
        self.norm_type = norm_type
        self.norm_decay = norm_decay
        self.freeze_norm = freeze_norm
        self.freeze_at = freeze_at
        if isinstance(return_idx, Integral):
            return_idx = [return_idx]
        assert max(return_idx) < num_stages, \
            'the maximum return index must smaller than num_stages, ' \
            'but received maximum return index is {} and num_stages ' \
            'is {}'.format(max(return_idx), num_stages)
        self.return_idx = return_idx
        self.num_stages = num_stages
        assert len(lr_mult_list) == 4, \
            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
        if isinstance(dcn_v2_stages, Integral):
            dcn_v2_stages = [dcn_v2_stages]
        assert max(dcn_v2_stages) < num_stages
        self.dcn_v2_stages = dcn_v2_stages
        block_nums = Res2Net_cfg[depth]
        # C1 stage
        if self.variant in ['c', 'd']:
            conv_def = [
                [3, 32, 3, 2, "conv1_1"],
                [32, 32, 3, 1, "conv1_2"],
                [32, 64, 3, 1, "conv1_3"],
            ]
        else:
            conv_def = [[3, 64, 7, 2, "conv1"]]
        self.res1 = nn.Sequential()
        for (c_in, c_out, k, s, _name) in conv_def:
            self.res1.add_sublayer(
                _name,
                ConvNormLayer(
                    ch_in=c_in,
                    ch_out=c_out,
                    filter_size=k,
                    stride=s,
                    groups=1,
                    act='relu',
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    lr=1.0))
        self._in_channels = [64, 256, 512, 1024]
        self._out_channels = [256, 512, 1024, 2048]
        self._out_strides = [4, 8, 16, 32]
        # C2-C5 stages
        self.res_layers = []
        for i in range(num_stages):
            lr_mult = lr_mult_list[i]
            stage_num = i + 2
            self.res_layers.append(
                self.add_sublayer(
                    "res{}".format(stage_num),
                    Blocks(
                        self._in_channels[i],
                        self._out_channels[i],
                        count=block_nums[i],
                        stage_num=stage_num,
                        width=width,
                        scales=scales,
                        groups=groups,
                        lr=lr_mult,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        dcn_v2=(i in self.dcn_v2_stages))))
    @property
    def out_shape(self):
        return [
            ShapeSpec(
                channels=self._out_channels[i], stride=self._out_strides[i])
            for i in self.return_idx
        ]
    def forward(self, inputs):
        x = inputs['image']
        res1 = self.res1(x)
        x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1)
        outs = []
        for idx, stage in enumerate(self.res_layers):
            x = stage(x)
            if idx == self.freeze_at:
                x.stop_gradient = True
            if idx in self.return_idx:
                outs.append(x)
        return outs
@register
 class Res2NetC5(nn.Layer):
    def __init__(self, depth=50, width=26, scales=4, variant='b'):
        super(Res2NetC5, self).__init__()
        feat_in, feat_out = [1024, 2048]
        self.res5 = Blocks(
            feat_in,
            feat_out,
            count=3,
            stage_num=5,
            width=width,
            scales=scales,
            variant=variant)
        self.feat_out = feat_out
    @property
    def out_shape(self):
        return [ShapeSpec(
            channels=self.feat_out,
            stride=32, )]
    def forward(self, roi_feat, stage=0):
        y = self.res5(roi_feat)
        return y
--- a/build/lib/ppdet/modeling/backbones/resnet.py
+++ b/build/lib/ppdet/modeling/backbones/resnet.py
@ -0,0 +1,606 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import math
 from numbers import Integral
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register, serializable
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Uniform
 from paddle import ParamAttr
 from paddle.nn.initializer import Constant
 from paddle.vision.ops import DeformConv2D
 from .name_adapter import NameAdapter
 from ..shape_spec import ShapeSpec
 __all__ = ['ResNet', 'Res5Head', 'Blocks', 'BasicBlock', 'BottleNeck']
 ResNet_cfg = {
    18: [2, 2, 2, 2],
    34: [3, 4, 6, 3],
    50: [3, 4, 6, 3],
    101: [3, 4, 23, 3],
    152: [3, 8, 36, 3],
 }
 class ConvNormLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride,
                 groups=1,
                 act=None,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 lr=1.0,
                 dcn_v2=False):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn']
        self.norm_type = norm_type
        self.act = act
        self.dcn_v2 = dcn_v2
        if not self.dcn_v2:
            self.conv = nn.Conv2D(
                in_channels=ch_in,
                out_channels=ch_out,
                kernel_size=filter_size,
                stride=stride,
                padding=(filter_size - 1) // 2,
                groups=groups,
                weight_attr=ParamAttr(learning_rate=lr),
                bias_attr=False)
        else:
            self.offset_channel = 2 * filter_size**2
            self.mask_channel = filter_size**2
            self.conv_offset = nn.Conv2D(
                in_channels=ch_in,
                out_channels=3 * filter_size**2,
                kernel_size=filter_size,
                stride=stride,
                padding=(filter_size - 1) // 2,
                weight_attr=ParamAttr(initializer=Constant(0.)),
                bias_attr=ParamAttr(initializer=Constant(0.)))
            self.conv = DeformConv2D(
                in_channels=ch_in,
                out_channels=ch_out,
                kernel_size=filter_size,
                stride=stride,
                padding=(filter_size - 1) // 2,
                dilation=1,
                groups=groups,
                weight_attr=ParamAttr(learning_rate=lr),
                bias_attr=False)
        norm_lr = 0. if freeze_norm else lr
        param_attr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),
            trainable=False if freeze_norm else True)
        bias_attr = ParamAttr(
            learning_rate=norm_lr,
            regularizer=L2Decay(norm_decay),
            trainable=False if freeze_norm else True)
        global_stats = True if freeze_norm else False
        if norm_type == 'sync_bn':
            self.norm = nn.SyncBatchNorm(
                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
        else:
            self.norm = nn.BatchNorm(
                ch_out,
                act=None,
                param_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=global_stats)
        norm_params = self.norm.parameters()
        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True
    def forward(self, inputs):
        if not self.dcn_v2:
            out = self.conv(inputs)
        else:
            offset_mask = self.conv_offset(inputs)
            offset, mask = paddle.split(
                offset_mask,
                num_or_sections=[self.offset_channel, self.mask_channel],
                axis=1)
            mask = F.sigmoid(mask)
            out = self.conv(inputs, offset, mask=mask)
        if self.norm_type in ['bn', 'sync_bn']:
            out = self.norm(out)
        if self.act:
            out = getattr(F, self.act)(out)
        return out
 class SELayer(nn.Layer):
    def __init__(self, ch, reduction_ratio=16):
        super(SELayer, self).__init__()
        self.pool = nn.AdaptiveAvgPool2D(1)
        stdv = 1.0 / math.sqrt(ch)
        c_ = ch // reduction_ratio
        self.squeeze = nn.Linear(
            ch,
            c_,
            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
            bias_attr=True)
        stdv = 1.0 / math.sqrt(c_)
        self.extract = nn.Linear(
            c_,
            ch,
            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
            bias_attr=True)
    def forward(self, inputs):
        out = self.pool(inputs)
        out = paddle.squeeze(out, axis=[2, 3])
        out = self.squeeze(out)
        out = F.relu(out)
        out = self.extract(out)
        out = F.sigmoid(out)
        out = paddle.unsqueeze(out, axis=[2, 3])
        scale = out * inputs
        return scale
 class BasicBlock(nn.Layer):
    expansion = 1
    def __init__(self,
                 ch_in,
                 ch_out,
                 stride,
                 shortcut,
                 variant='b',
                 groups=1,
                 base_width=64,
                 lr=1.0,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 dcn_v2=False,
                 std_senet=False):
        super(BasicBlock, self).__init__()
        assert dcn_v2 is False, "Not implemented yet."
        assert groups == 1 and base_width == 64, 'BasicBlock only supports groups=1 and base_width=64'
        self.shortcut = shortcut
        if not shortcut:
            if variant == 'd' and stride == 2:
                self.short = nn.Sequential()
                self.short.add_sublayer(
                    'pool',
                    nn.AvgPool2D(
                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
                self.short.add_sublayer(
                    'conv',
                    ConvNormLayer(
                        ch_in=ch_in,
                        ch_out=ch_out,
                        filter_size=1,
                        stride=1,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        lr=lr))
            else:
                self.short = ConvNormLayer(
                    ch_in=ch_in,
                    ch_out=ch_out,
                    filter_size=1,
                    stride=stride,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    lr=lr)
        self.branch2a = ConvNormLayer(
            ch_in=ch_in,
            ch_out=ch_out,
            filter_size=3,
            stride=stride,
            act='relu',
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
        self.branch2b = ConvNormLayer(
            ch_in=ch_out,
            ch_out=ch_out,
            filter_size=3,
            stride=1,
            act=None,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
        self.std_senet = std_senet
        if self.std_senet:
            self.se = SELayer(ch_out)
    def forward(self, inputs):
        out = self.branch2a(inputs)
        out = self.branch2b(out)
        if self.std_senet:
            out = self.se(out)
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)
        out = paddle.add(x=out, y=short)
        out = F.relu(out)
        return out
 class BottleNeck(nn.Layer):
    expansion = 4
    def __init__(self,
                 ch_in,
                 ch_out,
                 stride,
                 shortcut,
                 variant='b',
                 groups=1,
                 base_width=4,
                 lr=1.0,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 dcn_v2=False,
                 std_senet=False):
        super(BottleNeck, self).__init__()
        if variant == 'a':
            stride1, stride2 = stride, 1
        else:
            stride1, stride2 = 1, stride
        # ResNeXt
        width = int(ch_out * (base_width / 64.)) * groups
        self.shortcut = shortcut
        if not shortcut:
            if variant == 'd' and stride == 2:
                self.short = nn.Sequential()
                self.short.add_sublayer(
                    'pool',
                    nn.AvgPool2D(
                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
                self.short.add_sublayer(
                    'conv',
                    ConvNormLayer(
                        ch_in=ch_in,
                        ch_out=ch_out * self.expansion,
                        filter_size=1,
                        stride=1,
                        norm_type=norm_type,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        lr=lr))
            else:
                self.short = ConvNormLayer(
                    ch_in=ch_in,
                    ch_out=ch_out * self.expansion,
                    filter_size=1,
                    stride=stride,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    lr=lr)
        self.branch2a = ConvNormLayer(
            ch_in=ch_in,
            ch_out=width,
            filter_size=1,
            stride=stride1,
            groups=1,
            act='relu',
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
        self.branch2b = ConvNormLayer(
            ch_in=width,
            ch_out=width,
            filter_size=3,
            stride=stride2,
            groups=groups,
            act='relu',
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr,
            dcn_v2=dcn_v2)
        self.branch2c = ConvNormLayer(
            ch_in=width,
            ch_out=ch_out * self.expansion,
            filter_size=1,
            stride=1,
            groups=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            lr=lr)
        self.std_senet = std_senet
        if self.std_senet:
            self.se = SELayer(ch_out * self.expansion)
    def forward(self, inputs):
        out = self.branch2a(inputs)
        out = self.branch2b(out)
        out = self.branch2c(out)
        if self.std_senet:
            out = self.se(out)
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)
        out = paddle.add(x=out, y=short)
        out = F.relu(out)
        return out
 class Blocks(nn.Layer):
    def __init__(self,
                 block,
                 ch_in,
                 ch_out,
                 count,
                 name_adapter,
                 stage_num,
                 variant='b',
                 groups=1,
                 base_width=64,
                 lr=1.0,
                 norm_type='bn',
                 norm_decay=0.,
                 freeze_norm=True,
                 dcn_v2=False,
                 std_senet=False):
        super(Blocks, self).__init__()
        self.blocks = []
        for i in range(count):
            conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
            layer = self.add_sublayer(
                conv_name,
                block(
                    ch_in=ch_in,
                    ch_out=ch_out,
                    stride=2 if i == 0 and stage_num != 2 else 1,
                    shortcut=False if i == 0 else True,
                    variant=variant,
                    groups=groups,
                    base_width=base_width,
                    lr=lr,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    dcn_v2=dcn_v2,
                    std_senet=std_senet))
            self.blocks.append(layer)
            if i == 0:
                ch_in = ch_out * block.expansion
    def forward(self, inputs):
        block_out = inputs
        for block in self.blocks:
            block_out = block(block_out)
        return block_out
@register
@serializable
 class ResNet(nn.Layer):
    __shared__ = ['norm_type']
    def __init__(self,
                 depth=50,
                 ch_in=64,
                 variant='b',
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
                 groups=1,
                 base_width=64,
                 norm_type='bn',
                 norm_decay=0,
                 freeze_norm=True,
                 freeze_at=0,
                 return_idx=[0, 1, 2, 3],
                 dcn_v2_stages=[-1],
                 num_stages=4,
                 std_senet=False):
        """
        Residual Network, see https://arxiv.org/abs/1512.03385
        Args:
            depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
            ch_in (int): output channel of first stage, default 64
            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
                                 lower learning rate ratio is need for pretrained model 
                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
            groups (int): group convolution cardinality
            base_width (int): base width of each group convolution
            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
            norm_decay (float): weight decay for normalization layer weights
            freeze_norm (bool): freeze normalization layers
            freeze_at (int): freeze the backbone at which stage
            return_idx (list): index of the stages whose feature maps are returned
            dcn_v2_stages (list): index of stages who select deformable conv v2
            num_stages (int): total num of stages
            std_senet (bool): whether use senet, default True
        """
        super(ResNet, self).__init__()
        self._model_type = 'ResNet' if groups == 1 else 'ResNeXt'
        assert num_stages >= 1 and num_stages <= 4
        self.depth = depth
        self.variant = variant
        self.groups = groups
        self.base_width = base_width
        self.norm_type = norm_type
        self.norm_decay = norm_decay
        self.freeze_norm = freeze_norm
        self.freeze_at = freeze_at
        if isinstance(return_idx, Integral):
            return_idx = [return_idx]
        assert max(return_idx) < num_stages, \
            'the maximum return index must smaller than num_stages, ' \
            'but received maximum return index is {} and num_stages ' \
            'is {}'.format(max(return_idx), num_stages)
        self.return_idx = return_idx
        self.num_stages = num_stages
        assert len(lr_mult_list) == 4, \
            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
        if isinstance(dcn_v2_stages, Integral):
            dcn_v2_stages = [dcn_v2_stages]
        assert max(dcn_v2_stages) < num_stages
        if isinstance(dcn_v2_stages, Integral):
            dcn_v2_stages = [dcn_v2_stages]
        assert max(dcn_v2_stages) < num_stages
        self.dcn_v2_stages = dcn_v2_stages
        block_nums = ResNet_cfg[depth]
        na = NameAdapter(self)
        conv1_name = na.fix_c1_stage_name()
        if variant in ['c', 'd']:
            conv_def = [
                [3, ch_in // 2, 3, 2, "conv1_1"],
                [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
                [ch_in // 2, ch_in, 3, 1, "conv1_3"],
            ]
        else:
            conv_def = [[3, ch_in, 7, 2, conv1_name]]
        self.conv1 = nn.Sequential()
        for (c_in, c_out, k, s, _name) in conv_def:
            self.conv1.add_sublayer(
                _name,
                ConvNormLayer(
                    ch_in=c_in,
                    ch_out=c_out,
                    filter_size=k,
                    stride=s,
                    groups=1,
                    act='relu',
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    lr=1.0))
        self.ch_in = ch_in
        ch_out_list = [64, 128, 256, 512]
        block = BottleNeck if depth >= 50 else BasicBlock
        self._out_channels = [block.expansion * v for v in ch_out_list]
        self._out_strides = [4, 8, 16, 32]
        self.res_layers = []
        for i in range(num_stages):
            lr_mult = lr_mult_list[i]
            stage_num = i + 2
            res_name = "res{}".format(stage_num)
            res_layer = self.add_sublayer(
                res_name,
                Blocks(
                    block,
                    self.ch_in,
                    ch_out_list[i],
                    count=block_nums[i],
                    name_adapter=na,
                    stage_num=stage_num,
                    variant=variant,
                    groups=groups,
                    base_width=base_width,
                    lr=lr_mult,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    dcn_v2=(i in self.dcn_v2_stages),
                    std_senet=std_senet))
            self.res_layers.append(res_layer)
            self.ch_in = self._out_channels[i]
    @property
    def out_shape(self):
        return [
            ShapeSpec(
                channels=self._out_channels[i], stride=self._out_strides[i])
            for i in self.return_idx
        ]
    def forward(self, inputs):
        x = inputs['image']
        conv1 = self.conv1(x)
        x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
        outs = []
        for idx, stage in enumerate(self.res_layers):
            x = stage(x)
            if idx == self.freeze_at:
                x.stop_gradient = True
            if idx in self.return_idx:
                outs.append(x)
        return outs
@register
 class Res5Head(nn.Layer):
    def __init__(self, depth=50):
        super(Res5Head, self).__init__()
        feat_in, feat_out = [1024, 512]
        if depth < 50:
            feat_in = 256
        na = NameAdapter(self)
        block = BottleNeck if depth >= 50 else BasicBlock
        self.res5 = Blocks(
            block, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)
        self.feat_out = feat_out if depth < 50 else feat_out * 4
    @property
    def out_shape(self):
        return [ShapeSpec(
            channels=self.feat_out,
            stride=16, )]
    def forward(self, roi_feat, stage=0):
        y = self.res5(roi_feat)
        return y
--- a/build/lib/ppdet/modeling/backbones/senet.py
+++ b/build/lib/ppdet/modeling/backbones/senet.py
@ -0,0 +1,139 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle.nn as nn
 from ppdet.core.workspace import register, serializable
 from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
 __all__ = ['SENet', 'SERes5Head']
@register
@serializable
 class SENet(ResNet):
    __shared__ = ['norm_type']
    def __init__(self,
                 depth=50,
                 variant='b',
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
                 groups=1,
                 base_width=64,
                 norm_type='bn',
                 norm_decay=0,
                 freeze_norm=True,
                 freeze_at=0,
                 return_idx=[0, 1, 2, 3],
                 dcn_v2_stages=[-1],
                 std_senet=True,
                 num_stages=4):
        """
        Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
        Args:
            depth (int): SENet depth, should be 50, 101, 152
            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
                                 lower learning rate ratio is need for pretrained model 
                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
            groups (int): group convolution cardinality
            base_width (int): base width of each group convolution
            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
            norm_decay (float): weight decay for normalization layer weights
            freeze_norm (bool): freeze normalization layers
            freeze_at (int): freeze the backbone at which stage
            return_idx (list): index of the stages whose feature maps are returned
            dcn_v2_stages (list): index of stages who select deformable conv v2
            std_senet (bool): whether use senet, default True
            num_stages (int): total num of stages
        """
        super(SENet, self).__init__(
            depth=depth,
            variant=variant,
            lr_mult_list=lr_mult_list,
            ch_in=128,
            groups=groups,
            base_width=base_width,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            freeze_at=freeze_at,
            return_idx=return_idx,
            dcn_v2_stages=dcn_v2_stages,
            std_senet=std_senet,
            num_stages=num_stages)
@register
 class SERes5Head(nn.Layer):
    def __init__(self,
                 depth=50,
                 variant='b',
                 lr_mult=1.0,
                 groups=1,
                 base_width=64,
                 norm_type='bn',
                 norm_decay=0,
                 dcn_v2=False,
                 freeze_norm=False,
                 std_senet=True):
        """
        SERes5Head layer
        Args:
            depth (int): SENet depth, should be 50, 101, 152
            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
            lr_mult (list): learning rate ratio of SERes5Head, default as 1.0.
            groups (int): group convolution cardinality
            base_width (int): base width of each group convolution
            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
            norm_decay (float): weight decay for normalization layer weights
            dcn_v2_stages (list): index of stages who select deformable conv v2
            std_senet (bool): whether use senet, default True
        """
        super(SERes5Head, self).__init__()
        ch_out = 512
        ch_in = 256 if depth < 50 else 1024
        na = NameAdapter(self)
        block = BottleNeck if depth >= 50 else BasicBlock
        self.res5 = Blocks(
            block,
            ch_in,
            ch_out,
            count=3,
            name_adapter=na,
            stage_num=5,
            variant=variant,
            groups=groups,
            base_width=base_width,
            lr=lr_mult,
            norm_type=norm_type,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            dcn_v2=dcn_v2,
            std_senet=std_senet)
        self.ch_out = ch_out * block.expansion
    @property
    def out_shape(self):
        return [ShapeSpec(
            channels=self.ch_out,
            stride=16, )]
    def forward(self, roi_feat):
        y = self.res5(roi_feat)
        return y
--- a/build/lib/ppdet/modeling/backbones/vgg.py
+++ b/build/lib/ppdet/modeling/backbones/vgg.py
@ -0,0 +1,215 @@
 from __future__ import division
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.nn import Conv2D, MaxPool2D
 from ppdet.core.workspace import register, serializable
 from ..shape_spec import ShapeSpec
 __all__ = ['VGG']
 VGG_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]}
 class ConvBlock(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 groups,
                 pool_size=2,
                 pool_stride=2,
                 pool_padding=0,
                 name=None):
        super(ConvBlock, self).__init__()
        self.groups = groups
        self.conv0 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            weight_attr=ParamAttr(name=name + "1_weights"),
            bias_attr=ParamAttr(name=name + "1_bias"))
        self.conv_out_list = []
        for i in range(1, groups):
            conv_out = self.add_sublayer(
                'conv{}'.format(i),
                Conv2D(
                    in_channels=out_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(
                        name=name + "{}_weights".format(i + 1)),
                    bias_attr=ParamAttr(name=name + "{}_bias".format(i + 1))))
            self.conv_out_list.append(conv_out)
        self.pool = MaxPool2D(
            kernel_size=pool_size,
            stride=pool_stride,
            padding=pool_padding,
            ceil_mode=True)
    def forward(self, inputs):
        out = self.conv0(inputs)
        out = F.relu(out)
        for conv_i in self.conv_out_list:
            out = conv_i(out)
            out = F.relu(out)
        pool = self.pool(out)
        return out, pool
 class ExtraBlock(nn.Layer):
    def __init__(self,
                 in_channels,
                 mid_channels,
                 out_channels,
                 padding,
                 stride,
                 kernel_size,
                 name=None):
        super(ExtraBlock, self).__init__()
        self.conv0 = Conv2D(
            in_channels=in_channels,
            out_channels=mid_channels,
            kernel_size=1,
            stride=1,
            padding=0)
        self.conv1 = Conv2D(
            in_channels=mid_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding)
    def forward(self, inputs):
        out = self.conv0(inputs)
        out = F.relu(out)
        out = self.conv1(out)
        out = F.relu(out)
        return out
 class L2NormScale(nn.Layer):
    def __init__(self, num_channels, scale=1.0):
        super(L2NormScale, self).__init__()
        self.scale = self.create_parameter(
            attr=ParamAttr(initializer=paddle.nn.initializer.Constant(scale)),
            shape=[num_channels])
    def forward(self, inputs):
        out = F.normalize(inputs, axis=1, epsilon=1e-10)
        # out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
        #     out) * out
        out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3) * out
        return out
@register
@serializable
 class VGG(nn.Layer):
    def __init__(self,
                 depth=16,
                 normalizations=[20., -1, -1, -1, -1, -1],
                 extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
                                      [128, 256, 0, 1, 3],
                                      [128, 256, 0, 1, 3]]):
        super(VGG, self).__init__()
        assert depth in [16, 19], \
                "depth as 16/19 supported currently, but got {}".format(depth)
        self.depth = depth
        self.groups = VGG_cfg[depth]
        self.normalizations = normalizations
        self.extra_block_filters = extra_block_filters
        self._out_channels = []
        self.conv_block_0 = ConvBlock(
            3, 64, self.groups[0], 2, 2, 0, name="conv1_")
        self.conv_block_1 = ConvBlock(
            64, 128, self.groups[1], 2, 2, 0, name="conv2_")
        self.conv_block_2 = ConvBlock(
            128, 256, self.groups[2], 2, 2, 0, name="conv3_")
        self.conv_block_3 = ConvBlock(
            256, 512, self.groups[3], 2, 2, 0, name="conv4_")
        self.conv_block_4 = ConvBlock(
            512, 512, self.groups[4], 3, 1, 1, name="conv5_")
        self._out_channels.append(512)
        self.fc6 = Conv2D(
            in_channels=512,
            out_channels=1024,
            kernel_size=3,
            stride=1,
            padding=6,
            dilation=6)
        self.fc7 = Conv2D(
            in_channels=1024,
            out_channels=1024,
            kernel_size=1,
            stride=1,
            padding=0)
        self._out_channels.append(1024)
        # extra block
        self.extra_convs = []
        last_channels = 1024
        for i, v in enumerate(self.extra_block_filters):
            assert len(v) == 5, "extra_block_filters size not fix"
            extra_conv = self.add_sublayer("conv{}".format(6 + i),
                                           ExtraBlock(last_channels, v[0], v[1],
                                                      v[2], v[3], v[4]))
            last_channels = v[1]
            self.extra_convs.append(extra_conv)
            self._out_channels.append(last_channels)
        self.norms = []
        for i, n in enumerate(self.normalizations):
            if n != -1:
                norm = self.add_sublayer("norm{}".format(i),
                                         L2NormScale(
                                             self.extra_block_filters[i][1], n))
            else:
                norm = None
            self.norms.append(norm)
    def forward(self, inputs):
        outputs = []
        conv, pool = self.conv_block_0(inputs['image'])
        conv, pool = self.conv_block_1(pool)
        conv, pool = self.conv_block_2(pool)
        conv, pool = self.conv_block_3(pool)
        outputs.append(conv)
        conv, pool = self.conv_block_4(pool)
        out = self.fc6(pool)
        out = F.relu(out)
        out = self.fc7(out)
        out = F.relu(out)
        outputs.append(out)
        if not self.extra_block_filters:
            return outputs
        # extra block
        for extra_conv in self.extra_convs:
            out = extra_conv(out)
            outputs.append(out)
        for i, n in enumerate(self.normalizations):
            if n != -1:
                outputs[i] = self.norms[i](outputs[i])
        return outputs
    @property
    def out_shape(self):
        return [ShapeSpec(channels=c) for c in self._out_channels]
--- a/build/lib/ppdet/modeling/bbox_utils.py
+++ b/build/lib/ppdet/modeling/bbox_utils.py
@ -0,0 +1,459 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import paddle
 import numpy as np
 def bbox2delta(src_boxes, tgt_boxes, weights):
    src_w = src_boxes[:, 2] - src_boxes[:, 0]
    src_h = src_boxes[:, 3] - src_boxes[:, 1]
    src_ctr_x = src_boxes[:, 0] + 0.5 * src_w
    src_ctr_y = src_boxes[:, 1] + 0.5 * src_h
    tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0]
    tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1]
    tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w
    tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h
    wx, wy, ww, wh = weights
    dx = wx * (tgt_ctr_x - src_ctr_x) / src_w
    dy = wy * (tgt_ctr_y - src_ctr_y) / src_h
    dw = ww * paddle.log(tgt_w / src_w)
    dh = wh * paddle.log(tgt_h / src_h)
    deltas = paddle.stack((dx, dy, dw, dh), axis=1)
    return deltas
 def delta2bbox(deltas, boxes, weights):
    clip_scale = math.log(1000.0 / 16)
    widths = boxes[:, 2] - boxes[:, 0]
    heights = boxes[:, 3] - boxes[:, 1]
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights
    wx, wy, ww, wh = weights
    dx = deltas[:, 0::4] / wx
    dy = deltas[:, 1::4] / wy
    dw = deltas[:, 2::4] / ww
    dh = deltas[:, 3::4] / wh
    # Prevent sending too large values into paddle.exp()
    dw = paddle.clip(dw, max=clip_scale)
    dh = paddle.clip(dh, max=clip_scale)
    pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
    pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
    pred_w = paddle.exp(dw) * widths.unsqueeze(1)
    pred_h = paddle.exp(dh) * heights.unsqueeze(1)
    pred_boxes = []
    pred_boxes.append(pred_ctr_x - 0.5 * pred_w)
    pred_boxes.append(pred_ctr_y - 0.5 * pred_h)
    pred_boxes.append(pred_ctr_x + 0.5 * pred_w)
    pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
    pred_boxes = paddle.stack(pred_boxes, axis=-1)
    return pred_boxes
 def expand_bbox(bboxes, scale):
    w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5
    h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5
    x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5
    y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5
    w_half *= scale
    h_half *= scale
    bboxes_exp = np.zeros(bboxes.shape, dtype=np.float32)
    bboxes_exp[:, 0] = x_c - w_half
    bboxes_exp[:, 2] = x_c + w_half
    bboxes_exp[:, 1] = y_c - h_half
    bboxes_exp[:, 3] = y_c + h_half
    return bboxes_exp
 def clip_bbox(boxes, im_shape):
    h, w = im_shape[0], im_shape[1]
    x1 = boxes[:, 0].clip(0, w)
    y1 = boxes[:, 1].clip(0, h)
    x2 = boxes[:, 2].clip(0, w)
    y2 = boxes[:, 3].clip(0, h)
    return paddle.stack([x1, y1, x2, y2], axis=1)
 def nonempty_bbox(boxes, min_size=0, return_mask=False):
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]
    mask = paddle.logical_and(w > min_size, w > min_size)
    if return_mask:
        return mask
    keep = paddle.nonzero(mask).flatten()
    return keep
 def bbox_area(boxes):
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
 def bbox_overlaps(boxes1, boxes2):
    """
    Calculate overlaps between boxes1 and boxes2
    Args:
        boxes1 (Tensor): boxes with shape [M, 4]
        boxes2 (Tensor): boxes with shape [N, 4]
    Return:
        overlaps (Tensor): overlaps between boxes1 and boxes2 with shape [M, N]
    """
    M = boxes1.shape[0]
    N = boxes2.shape[0]
    if M * N == 0:
        return paddle.zeros([M, N], dtype='float32')
    area1 = bbox_area(boxes1)
    area2 = bbox_area(boxes2)
    xy_max = paddle.minimum(
        paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:])
    xy_min = paddle.maximum(
        paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2])
    width_height = xy_max - xy_min
    width_height = width_height.clip(min=0)
    inter = width_height.prod(axis=2)
    overlaps = paddle.where(inter > 0, inter /
                            (paddle.unsqueeze(area1, 1) + area2 - inter),
                            paddle.zeros_like(inter))
    return overlaps
 def xywh2xyxy(box):
    x, y, w, h = box
    x1 = x - w * 0.5
    y1 = y - h * 0.5
    x2 = x + w * 0.5
    y2 = y + h * 0.5
    return [x1, y1, x2, y2]
 def make_grid(h, w, dtype):
    yv, xv = paddle.meshgrid([paddle.arange(h), paddle.arange(w)])
    return paddle.stack((xv, yv), 2).cast(dtype=dtype)
 def decode_yolo(box, anchor, downsample_ratio):
    """decode yolo box
    Args:
        box (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
        anchor (list): anchor with the shape [na, 2]
        downsample_ratio (int): downsample ratio, default 32
        scale (float): scale, default 1.
    Return:
        box (list): decoded box, [x, y, w, h], all have the shape [b, na, h, w, 1]
    """
    x, y, w, h = box
    na, grid_h, grid_w = x.shape[1:4]
    grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2))
    x1 = (x + grid[:, :, :, :, 0:1]) / grid_w
    y1 = (y + grid[:, :, :, :, 1:2]) / grid_h
    anchor = paddle.to_tensor(anchor)
    anchor = paddle.cast(anchor, x.dtype)
    anchor = anchor.reshape((1, na, 1, 1, 2))
    w1 = paddle.exp(w) * anchor[:, :, :, :, 0:1] / (downsample_ratio * grid_w)
    h1 = paddle.exp(h) * anchor[:, :, :, :, 1:2] / (downsample_ratio * grid_h)
    return [x1, y1, w1, h1]
 def iou_similarity(box1, box2, eps=1e-9):
    """Calculate iou of box1 and box2
    Args:
        box1 (Tensor): box with the shape [N, M1, 4]
        box2 (Tensor): box with the shape [N, M2, 4]
    Return:
        iou (Tensor): iou between box1 and box2 with the shape [N, M1, M2]
    """
    box1 = box1.unsqueeze(2)  # [N, M1, 4] -> [N, M1, 1, 4]
    box2 = box2.unsqueeze(1)  # [N, M2, 4] -> [N, 1, M2, 4]
    px1y1, px2y2 = box1[:, :, :, 0:2], box1[:, :, :, 2:4]
    gx1y1, gx2y2 = box2[:, :, :, 0:2], box2[:, :, :, 2:4]
    x1y1 = paddle.maximum(px1y1, gx1y1)
    x2y2 = paddle.minimum(px2y2, gx2y2)
    overlap = (x2y2 - x1y1).clip(0).prod(-1)
    area1 = (px2y2 - px1y1).clip(0).prod(-1)
    area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
    union = area1 + area2 - overlap + eps
    return overlap / union
 def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
    """calculate the iou of box1 and box2
    Args:
        box1 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
        box2 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
        giou (bool): whether use giou or not, default False
        diou (bool): whether use diou or not, default False
        ciou (bool): whether use ciou or not, default False
        eps (float): epsilon to avoid divide by zero
    Return:
        iou (Tensor): iou of box1 and box1, with the shape [b, na, h, w, 1]
    """
    px1, py1, px2, py2 = box1
    gx1, gy1, gx2, gy2 = box2
    x1 = paddle.maximum(px1, gx1)
    y1 = paddle.maximum(py1, gy1)
    x2 = paddle.minimum(px2, gx2)
    y2 = paddle.minimum(py2, gy2)
    overlap = ((x2 - x1).clip(0)) * ((y2 - y1).clip(0))
    area1 = (px2 - px1) * (py2 - py1)
    area1 = area1.clip(0)
    area2 = (gx2 - gx1) * (gy2 - gy1)
    area2 = area2.clip(0)
    union = area1 + area2 - overlap + eps
    iou = overlap / union
    if giou or ciou or diou:
        # convex w, h
        cw = paddle.maximum(px2, gx2) - paddle.minimum(px1, gx1)
        ch = paddle.maximum(py2, gy2) - paddle.minimum(py1, gy1)
        if giou:
            c_area = cw * ch + eps
            return iou - (c_area - union) / c_area
        else:
            # convex diagonal squared
            c2 = cw**2 + ch**2 + eps
            # center distance
            rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
            if diou:
                return iou - rho2 / c2
            else:
                w1, h1 = px2 - px1, py2 - py1 + eps
                w2, h2 = gx2 - gx1, gy2 - gy1 + eps
                delta = paddle.atan(w1 / h1) - paddle.atan(w2 / h2)
                v = (4 / math.pi**2) * paddle.pow(delta, 2)
                alpha = v / (1 + eps - iou + v)
                alpha.stop_gradient = True
                return iou - (rho2 / c2 + v * alpha)
    else:
        return iou
 def poly2rbox(polys):
    """
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    to
    rotated_boxes:[x_ctr,y_ctr,w,h,angle]
    """
    rotated_boxes = []
    for poly in polys:
        poly = np.array(poly[:8], dtype=np.float32)
        pt1 = (poly[0], poly[1])
        pt2 = (poly[2], poly[3])
        pt3 = (poly[4], poly[5])
        pt4 = (poly[6], poly[7])
        edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[
            1]) * (pt1[1] - pt2[1]))
        edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[
            1]) * (pt2[1] - pt3[1]))
        width = max(edge1, edge2)
        height = min(edge1, edge2)
        rbox_angle = 0
        if edge1 > edge2:
            rbox_angle = np.arctan2(
                np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0]))
        elif edge2 >= edge1:
            rbox_angle = np.arctan2(
                np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0]))
        def norm_angle(angle, range=[-np.pi / 4, np.pi]):
            return (angle - range[0]) % range[1] + range[0]
        rbox_angle = norm_angle(rbox_angle)
        x_ctr = np.float(pt1[0] + pt3[0]) / 2
        y_ctr = np.float(pt1[1] + pt3[1]) / 2
        rotated_box = np.array([x_ctr, y_ctr, width, height, rbox_angle])
        rotated_boxes.append(rotated_box)
    ret_rotated_boxes = np.array(rotated_boxes)
    assert ret_rotated_boxes.shape[1] == 5
    return ret_rotated_boxes
 def cal_line_length(point1, point2):
    import math
    return math.sqrt(
        math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
 def get_best_begin_point_single(coordinate):
    x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
    xmin = min(x1, x2, x3, x4)
    ymin = min(y1, y2, y3, y4)
    xmax = max(x1, x2, x3, x4)
    ymax = max(y1, y2, y3, y4)
    combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
                 [[x4, y4], [x1, y1], [x2, y2], [x3, y3]],
                 [[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
                 [[x2, y2], [x3, y3], [x4, y4], [x1, y1]]]
    dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
    force = 100000000.0
    force_flag = 0
    for i in range(4):
        temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) \
                     + cal_line_length(combinate[i][1], dst_coordinate[1]) \
                     + cal_line_length(combinate[i][2], dst_coordinate[2]) \
                     + cal_line_length(combinate[i][3], dst_coordinate[3])
        if temp_force < force:
            force = temp_force
            force_flag = i
    if force_flag != 0:
        pass
    return np.array(combinate[force_flag]).reshape(8)
 def rbox2poly_np(rrects):
    """
    rrect:[x_ctr,y_ctr,w,h,angle]
    to
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    """
    polys = []
    for i in range(rrects.shape[0]):
        rrect = rrects[i]
        # x_ctr, y_ctr, width, height, angle = rrect[:5]
        x_ctr = rrect[0]
        y_ctr = rrect[1]
        width = rrect[2]
        height = rrect[3]
        angle = rrect[4]
        tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
        rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
        R = np.array([[np.cos(angle), -np.sin(angle)],
                      [np.sin(angle), np.cos(angle)]])
        poly = R.dot(rect)
        x0, x1, x2, x3 = poly[0, :4] + x_ctr
        y0, y1, y2, y3 = poly[1, :4] + y_ctr
        poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
        poly = get_best_begin_point_single(poly)
        polys.append(poly)
    polys = np.array(polys)
    return polys
 def rbox2poly(rrects):
    """
    rrect:[x_ctr,y_ctr,w,h,angle]
    to
    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
    """
    N = paddle.shape(rrects)[0]
    x_ctr = rrects[:, 0]
    y_ctr = rrects[:, 1]
    width = rrects[:, 2]
    height = rrects[:, 3]
    angle = rrects[:, 4]
    tl_x, tl_y, br_x, br_y = -width * 0.5, -height * 0.5, width * 0.5, height * 0.5
    normal_rects = paddle.stack(
        [tl_x, br_x, br_x, tl_x, tl_y, tl_y, br_y, br_y], axis=0)
    normal_rects = paddle.reshape(normal_rects, [2, 4, N])
    normal_rects = paddle.transpose(normal_rects, [2, 0, 1])
    sin, cos = paddle.sin(angle), paddle.cos(angle)
    # M.shape=[N,2,2]
    M = paddle.stack([cos, -sin, sin, cos], axis=0)
    M = paddle.reshape(M, [2, 2, N])
    M = paddle.transpose(M, [2, 0, 1])
    # polys:[N,8]
    polys = paddle.matmul(M, normal_rects)
    polys = paddle.transpose(polys, [2, 1, 0])
    polys = paddle.reshape(polys, [-1, N])
    polys = paddle.transpose(polys, [1, 0])
    tmp = paddle.stack(
        [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
    polys = polys + tmp
    return polys
 def bbox_iou_np_expand(box1, box2, x1y1x2y2=True, eps=1e-16):
    """
    Calculate the iou of box1 and box2 with numpy.
    Args:
        box1 (ndarray): [N, 4]
        box2 (ndarray): [M, 4], usually N != M
        x1y1x2y2 (bool): whether in x1y1x2y2 stype, default True
        eps (float): epsilon to avoid divide by zero
    Return:
        iou (ndarray): iou of box1 and box2, [N, M]
    """
    N, M = len(box1), len(box2)  # usually N != M
    if x1y1x2y2:
        b1_x1, b1_y1 = box1[:, 0], box1[:, 1]
        b1_x2, b1_y2 = box1[:, 2], box1[:, 3]
        b2_x1, b2_y1 = box2[:, 0], box2[:, 1]
        b2_x2, b2_y2 = box2[:, 2], box2[:, 3]
    else:
        # cxcywh style
        # Transform from center and width to exact coordinates
        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
    # get the coordinates of the intersection rectangle
    inter_rect_x1 = np.zeros((N, M), dtype=np.float32)
    inter_rect_y1 = np.zeros((N, M), dtype=np.float32)
    inter_rect_x2 = np.zeros((N, M), dtype=np.float32)
    inter_rect_y2 = np.zeros((N, M), dtype=np.float32)
    for i in range(len(box2)):
        inter_rect_x1[:, i] = np.maximum(b1_x1, b2_x1[i])
        inter_rect_y1[:, i] = np.maximum(b1_y1, b2_y1[i])
        inter_rect_x2[:, i] = np.minimum(b1_x2, b2_x2[i])
        inter_rect_y2[:, i] = np.minimum(b1_y2, b2_y2[i])
    # Intersection area
    inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * np.maximum(
        inter_rect_y2 - inter_rect_y1, 0)
    # Union Area
    b1_area = np.repeat(
        ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).reshape(-1, 1), M, axis=-1)
    b2_area = np.repeat(
        ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).reshape(1, -1), N, axis=0)
    ious = inter_area / (b1_area + b2_area - inter_area + eps)
    return ious
--- a/build/lib/ppdet/modeling/heads/init.py
+++ b/build/lib/ppdet/modeling/heads/init.py
@ -0,0 +1,41 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import bbox_head
 from . import mask_head
 from . import yolo_head
 from . import roi_extractor
 from . import ssd_head
 from . import fcos_head
 from . import solov2_head
 from . import ttf_head
 from . import cascade_head
 from . import face_head
 from . import s2anet_head
 from . import keypoint_hrhrnet_head
 from . import centernet_head
 from .bbox_head import *
 from .mask_head import *
 from .yolo_head import *
 from .roi_extractor import *
 from .ssd_head import *
 from .fcos_head import *
 from .solov2_head import *
 from .ttf_head import *
 from .cascade_head import *
 from .face_head import *
 from .s2anet_head import *
 from .keypoint_hrhrnet_head import *
 from .centernet_head import *
--- a/build/lib/ppdet/modeling/heads/bbox_head.py
+++ b/build/lib/ppdet/modeling/heads/bbox_head.py
@ -0,0 +1,376 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import numpy as np
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, XavierUniform, KaimingNormal
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register, create
 from .roi_extractor import RoIAlign
 from ..shape_spec import ShapeSpec
 from ..bbox_utils import bbox2delta
 from ppdet.modeling.layers import ConvNormLayer
 __all__ = ['TwoFCHead', 'XConvNormHead', 'BBoxHead']
@register
 class TwoFCHead(nn.Layer):
    """
    RCNN bbox head with Two fc layers to extract feature
    Args:
        in_channel (int): Input channel which can be derived by from_config
        out_channel (int): Output channel
        resolution (int): Resolution of input feature map, default 7
    """
    def __init__(self, in_channel=256, out_channel=1024, resolution=7):
        super(TwoFCHead, self).__init__()
        self.in_channel = in_channel
        self.out_channel = out_channel
        fan = in_channel * resolution * resolution
        self.fc6 = nn.Linear(
            in_channel * resolution * resolution,
            out_channel,
            weight_attr=paddle.ParamAttr(
                initializer=XavierUniform(fan_out=fan)))
        self.fc6.skip_quant = True
        self.fc7 = nn.Linear(
            out_channel,
            out_channel,
            weight_attr=paddle.ParamAttr(initializer=XavierUniform()))
        self.fc7.skip_quant = True
    @classmethod
    def from_config(cls, cfg, input_shape):
        s = input_shape
        s = s[0] if isinstance(s, (list, tuple)) else s
        return {'in_channel': s.channels}
    @property
    def out_shape(self):
        return [ShapeSpec(channels=self.out_channel, )]
    def forward(self, rois_feat):
        rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
        fc6 = self.fc6(rois_feat)
        fc6 = F.relu(fc6)
        fc7 = self.fc7(fc6)
        fc7 = F.relu(fc7)
        return fc7
@register
 class XConvNormHead(nn.Layer):
    __shared__ = ['norm_type', 'freeze_norm']
    """
    RCNN bbox head with serveral convolution layers
    Args:
        in_channel (int): Input channels which can be derived by from_config
        num_convs (int): The number of conv layers
        conv_dim (int): The number of channels for the conv layers
        out_channel (int): Output channels
        resolution (int): Resolution of input feature map
        norm_type (string): Norm type, bn, gn, sync_bn are available, 
            default `gn`
        freeze_norm (bool): Whether to freeze the norm
        stage_name (string): Prefix name for conv layer,  '' by default
    """
    def __init__(self,
                 in_channel=256,
                 num_convs=4,
                 conv_dim=256,
                 out_channel=1024,
                 resolution=7,
                 norm_type='gn',
                 freeze_norm=False,
                 stage_name=''):
        super(XConvNormHead, self).__init__()
        self.in_channel = in_channel
        self.num_convs = num_convs
        self.conv_dim = conv_dim
        self.out_channel = out_channel
        self.norm_type = norm_type
        self.freeze_norm = freeze_norm
        self.bbox_head_convs = []
        fan = conv_dim * 3 * 3
        initializer = KaimingNormal(fan_in=fan)
        for i in range(self.num_convs):
            in_c = in_channel if i == 0 else conv_dim
            head_conv_name = stage_name + 'bbox_head_conv{}'.format(i)
            head_conv = self.add_sublayer(
                head_conv_name,
                ConvNormLayer(
                    ch_in=in_c,
                    ch_out=conv_dim,
                    filter_size=3,
                    stride=1,
                    norm_type=self.norm_type,
                    freeze_norm=self.freeze_norm,
                    initializer=initializer))
            self.bbox_head_convs.append(head_conv)
        fan = conv_dim * resolution * resolution
        self.fc6 = nn.Linear(
            conv_dim * resolution * resolution,
            out_channel,
            weight_attr=paddle.ParamAttr(
                initializer=XavierUniform(fan_out=fan)),
            bias_attr=paddle.ParamAttr(
                learning_rate=2., regularizer=L2Decay(0.)))
    @classmethod
    def from_config(cls, cfg, input_shape):
        s = input_shape
        s = s[0] if isinstance(s, (list, tuple)) else s
        return {'in_channel': s.channels}
    @property
    def out_shape(self):
        return [ShapeSpec(channels=self.out_channel, )]
    def forward(self, rois_feat):
        for i in range(self.num_convs):
            rois_feat = F.relu(self.bbox_head_convs[i](rois_feat))
        rois_feat = paddle.flatten(rois_feat, start_axis=1, stop_axis=-1)
        fc6 = F.relu(self.fc6(rois_feat))
        return fc6
@register
 class BBoxHead(nn.Layer):
    __shared__ = ['num_classes']
    __inject__ = ['bbox_assigner', 'bbox_loss']
    """
    RCNN bbox head
    Args:
        head (nn.Layer): Extract feature in bbox head
        in_channel (int): Input channel after RoI extractor
        roi_extractor (object): The module of RoI Extractor
        bbox_assigner (object): The module of Box Assigner, label and sample the 
            box.
        with_pool (bool): Whether to use pooling for the RoI feature.
        num_classes (int): The number of classes
        bbox_weight (List[float]): The weight to get the decode box 
    """
    def __init__(self,
                 head,
                 in_channel,
                 roi_extractor=RoIAlign().__dict__,
                 bbox_assigner='BboxAssigner',
                 with_pool=False,
                 num_classes=80,
                 bbox_weight=[10., 10., 5., 5.],
                 bbox_loss=None):
        super(BBoxHead, self).__init__()
        self.head = head
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIAlign(**roi_extractor)
        self.bbox_assigner = bbox_assigner
        self.with_pool = with_pool
        self.num_classes = num_classes
        self.bbox_weight = bbox_weight
        self.bbox_loss = bbox_loss
        self.bbox_score = nn.Linear(
            in_channel,
            self.num_classes + 1,
            weight_attr=paddle.ParamAttr(initializer=Normal(
                mean=0.0, std=0.01)))
        self.bbox_score.skip_quant = True
        self.bbox_delta = nn.Linear(
            in_channel,
            4 * self.num_classes,
            weight_attr=paddle.ParamAttr(initializer=Normal(
                mean=0.0, std=0.001)))
        self.bbox_delta.skip_quant = True
        self.assigned_label = None
        self.assigned_rois = None
    @classmethod
    def from_config(cls, cfg, input_shape):
        roi_pooler = cfg['roi_extractor']
        assert isinstance(roi_pooler, dict)
        kwargs = RoIAlign.from_config(cfg, input_shape)
        roi_pooler.update(kwargs)
        kwargs = {'input_shape': input_shape}
        head = create(cfg['head'], **kwargs)
        return {
            'roi_extractor': roi_pooler,
            'head': head,
            'in_channel': head.out_shape[0].channels
        }
    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
        """
        body_feats (list[Tensor]): Feature maps from backbone
        rois (list[Tensor]): RoIs generated from RPN module
        rois_num (Tensor): The number of RoIs in each image
        inputs (dict{Tensor}): The ground-truth of image
        """
        if self.training:
            rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
            self.assigned_rois = (rois, rois_num)
            self.assigned_targets = targets
        rois_feat = self.roi_extractor(body_feats, rois, rois_num)
        bbox_feat = self.head(rois_feat)
        if self.with_pool:
            feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
            feat = paddle.squeeze(feat, axis=[2, 3])
        else:
            feat = bbox_feat
        scores = self.bbox_score(feat)
        deltas = self.bbox_delta(feat)
        if self.training:
            loss = self.get_loss(scores, deltas, targets, rois,
                                 self.bbox_weight)
            return loss, bbox_feat
        else:
            pred = self.get_prediction(scores, deltas)
            return pred, self.head
    def get_loss(self, scores, deltas, targets, rois, bbox_weight):
        """
        scores (Tensor): scores from bbox head outputs
        deltas (Tensor): deltas from bbox head outputs
        targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds
        rois (List[Tensor]): RoIs generated in each batch
        """
        cls_name = 'loss_bbox_cls'
        reg_name = 'loss_bbox_reg'
        loss_bbox = {}
        # TODO: better pass args
        tgt_labels, tgt_bboxes, tgt_gt_inds = targets
        # bbox cls
        tgt_labels = paddle.concat(tgt_labels) if len(
            tgt_labels) > 1 else tgt_labels[0]
        valid_inds = paddle.nonzero(tgt_labels >= 0).flatten()
        if valid_inds.shape[0] == 0:
            loss_bbox[cls_name] = paddle.zeros([1], dtype='float32')
        else:
            tgt_labels = tgt_labels.cast('int64')
            tgt_labels.stop_gradient = True
            loss_bbox_cls = F.cross_entropy(
                input=scores, label=tgt_labels, reduction='mean')
            loss_bbox[cls_name] = loss_bbox_cls
        # bbox reg
        cls_agnostic_bbox_reg = deltas.shape[1] == 4
        fg_inds = paddle.nonzero(
            paddle.logical_and(tgt_labels >= 0, tgt_labels <
                               self.num_classes)).flatten()
        if fg_inds.numel() == 0:
            loss_bbox[reg_name] = paddle.zeros([1], dtype='float32')
            return loss_bbox
        if cls_agnostic_bbox_reg:
            reg_delta = paddle.gather(deltas, fg_inds)
        else:
            fg_gt_classes = paddle.gather(tgt_labels, fg_inds)
            reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1)
            reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1])
            reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4)
            reg_col_inds = reg_col_inds.reshape([-1, 1])
            reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1)
            reg_delta = paddle.gather(deltas, fg_inds)
            reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4])
        rois = paddle.concat(rois) if len(rois) > 1 else rois[0]
        tgt_bboxes = paddle.concat(tgt_bboxes) if len(
            tgt_bboxes) > 1 else tgt_bboxes[0]
        reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight)
        reg_target = paddle.gather(reg_target, fg_inds)
        reg_target.stop_gradient = True
        if self.bbox_loss is not None:
            reg_delta = self.bbox_transform(reg_delta)
            reg_target = self.bbox_transform(reg_target)
            loss_bbox_reg = self.bbox_loss(
                reg_delta, reg_target).sum() / tgt_labels.shape[0]
            loss_bbox_reg *= self.num_classes
        else:
            loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum(
            ) / tgt_labels.shape[0]
        loss_bbox[reg_name] = loss_bbox_reg
        return loss_bbox
    def bbox_transform(self, deltas, weights=[0.1, 0.1, 0.2, 0.2]):
        wx, wy, ww, wh = weights
        deltas = paddle.reshape(deltas, shape=(0, -1, 4))
        dx = paddle.slice(deltas, axes=[2], starts=[0], ends=[1]) * wx
        dy = paddle.slice(deltas, axes=[2], starts=[1], ends=[2]) * wy
        dw = paddle.slice(deltas, axes=[2], starts=[2], ends=[3]) * ww
        dh = paddle.slice(deltas, axes=[2], starts=[3], ends=[4]) * wh
        dw = paddle.clip(dw, -1.e10, np.log(1000. / 16))
        dh = paddle.clip(dh, -1.e10, np.log(1000. / 16))
        pred_ctr_x = dx
        pred_ctr_y = dy
        pred_w = paddle.exp(dw)
        pred_h = paddle.exp(dh)
        x1 = pred_ctr_x - 0.5 * pred_w
        y1 = pred_ctr_y - 0.5 * pred_h
        x2 = pred_ctr_x + 0.5 * pred_w
        y2 = pred_ctr_y + 0.5 * pred_h
        x1 = paddle.reshape(x1, shape=(-1, ))
        y1 = paddle.reshape(y1, shape=(-1, ))
        x2 = paddle.reshape(x2, shape=(-1, ))
        y2 = paddle.reshape(y2, shape=(-1, ))
        return paddle.concat([x1, y1, x2, y2])
    def get_prediction(self, score, delta):
        bbox_prob = F.softmax(score)
        return delta, bbox_prob
    def get_head(self, ):
        return self.head
    def get_assigned_targets(self, ):
        return self.assigned_targets
    def get_assigned_rois(self, ):
        return self.assigned_rois
--- a/build/lib/ppdet/modeling/heads/cascade_head.py
+++ b/build/lib/ppdet/modeling/heads/cascade_head.py
@ -0,0 +1,281 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal
 from ppdet.core.workspace import register
 from .bbox_head import BBoxHead, TwoFCHead, XConvNormHead
 from .roi_extractor import RoIAlign
 from ..shape_spec import ShapeSpec
 from ..bbox_utils import delta2bbox, clip_bbox, nonempty_bbox
 __all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead']
@register
 class CascadeTwoFCHead(nn.Layer):
    __shared__ = ['num_cascade_stage']
    """
    Cascade RCNN bbox head  with Two fc layers to extract feature
    Args:
        in_channel (int): Input channel which can be derived by from_config
        out_channel (int): Output channel
        resolution (int): Resolution of input feature map, default 7
        num_cascade_stage (int): The number of cascade stage, default 3
    """
    def __init__(self,
                 in_channel=256,
                 out_channel=1024,
                 resolution=7,
                 num_cascade_stage=3):
        super(CascadeTwoFCHead, self).__init__()
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.head_list = []
        for stage in range(num_cascade_stage):
            head_per_stage = self.add_sublayer(
                str(stage), TwoFCHead(in_channel, out_channel, resolution))
            self.head_list.append(head_per_stage)
    @classmethod
    def from_config(cls, cfg, input_shape):
        s = input_shape
        s = s[0] if isinstance(s, (list, tuple)) else s
        return {'in_channel': s.channels}
    @property
    def out_shape(self):
        return [ShapeSpec(channels=self.out_channel, )]
    def forward(self, rois_feat, stage=0):
        out = self.head_list[stage](rois_feat)
        return out
@register
 class CascadeXConvNormHead(nn.Layer):
    __shared__ = ['norm_type', 'freeze_norm', 'num_cascade_stage']
    """
    Cascade RCNN bbox head with serveral convolution layers
    Args:
        in_channel (int): Input channels which can be derived by from_config
        num_convs (int): The number of conv layers
        conv_dim (int): The number of channels for the conv layers
        out_channel (int): Output channels
        resolution (int): Resolution of input feature map
        norm_type (string): Norm type, bn, gn, sync_bn are available, 
            default `gn`
        freeze_norm (bool): Whether to freeze the norm
        num_cascade_stage (int): The number of cascade stage, default 3
    """
    def __init__(self,
                 in_channel=256,
                 num_convs=4,
                 conv_dim=256,
                 out_channel=1024,
                 resolution=7,
                 norm_type='gn',
                 freeze_norm=False,
                 num_cascade_stage=3):
        super(CascadeXConvNormHead, self).__init__()
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.head_list = []
        for stage in range(num_cascade_stage):
            head_per_stage = self.add_sublayer(
                str(stage),
                XConvNormHead(
                    in_channel,
                    num_convs,
                    conv_dim,
                    out_channel,
                    resolution,
                    norm_type,
                    freeze_norm,
                    stage_name='stage{}_'.format(stage)))
            self.head_list.append(head_per_stage)
    @classmethod
    def from_config(cls, cfg, input_shape):
        s = input_shape
        s = s[0] if isinstance(s, (list, tuple)) else s
        return {'in_channel': s.channels}
    @property
    def out_shape(self):
        return [ShapeSpec(channels=self.out_channel, )]
    def forward(self, rois_feat, stage=0):
        out = self.head_list[stage](rois_feat)
        return out
@register
 class CascadeHead(BBoxHead):
    __shared__ = ['num_classes', 'num_cascade_stages']
    __inject__ = ['bbox_assigner', 'bbox_loss']
    """
    Cascade RCNN bbox head
    Args:
        head (nn.Layer): Extract feature in bbox head
        in_channel (int): Input channel after RoI extractor
        roi_extractor (object): The module of RoI Extractor
        bbox_assigner (object): The module of Box Assigner, label and sample the 
            box.
        num_classes (int): The number of classes
        bbox_weight (List[List[float]]): The weight to get the decode box and the 
            length of weight is the number of cascade stage
        num_cascade_stages (int): THe number of stage to refine the box
    """
    def __init__(self,
                 head,
                 in_channel,
                 roi_extractor=RoIAlign().__dict__,
                 bbox_assigner='BboxAssigner',
                 num_classes=80,
                 bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
                              [30.0, 30.0, 15.0, 15.0]],
                 num_cascade_stages=3,
                 bbox_loss=None):
        nn.Layer.__init__(self, )
        self.head = head
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIAlign(**roi_extractor)
        self.bbox_assigner = bbox_assigner
        self.num_classes = num_classes
        self.bbox_weight = bbox_weight
        self.num_cascade_stages = num_cascade_stages
        self.bbox_loss = bbox_loss
        self.bbox_score_list = []
        self.bbox_delta_list = []
        for i in range(num_cascade_stages):
            score_name = 'bbox_score_stage{}'.format(i)
            delta_name = 'bbox_delta_stage{}'.format(i)
            bbox_score = self.add_sublayer(
                score_name,
                nn.Linear(
                    in_channel,
                    self.num_classes + 1,
                    weight_attr=paddle.ParamAttr(initializer=Normal(
                        mean=0.0, std=0.01))))
            bbox_delta = self.add_sublayer(
                delta_name,
                nn.Linear(
                    in_channel,
                    4,
                    weight_attr=paddle.ParamAttr(initializer=Normal(
                        mean=0.0, std=0.001))))
            self.bbox_score_list.append(bbox_score)
            self.bbox_delta_list.append(bbox_delta)
        self.assigned_label = None
        self.assigned_rois = None
    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
        """
        body_feats (list[Tensor]): Feature maps from backbone
        rois (Tensor): RoIs generated from RPN module
        rois_num (Tensor): The number of RoIs in each image
        inputs (dict{Tensor}): The ground-truth of image
        """
        targets = []
        if self.training:
            rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
            targets_list = [targets]
            self.assigned_rois = (rois, rois_num)
            self.assigned_targets = targets
        pred_bbox = None
        head_out_list = []
        for i in range(self.num_cascade_stages):
            if i > 0:
                rois, rois_num = self._get_rois_from_boxes(pred_bbox,
                                                           inputs['im_shape'])
                if self.training:
                    rois, rois_num, targets = self.bbox_assigner(
                        rois, rois_num, inputs, i, is_cascade=True)
                    targets_list.append(targets)
            rois_feat = self.roi_extractor(body_feats, rois, rois_num)
            bbox_feat = self.head(rois_feat, i)
            scores = self.bbox_score_list[i](bbox_feat)
            deltas = self.bbox_delta_list[i](bbox_feat)
            head_out_list.append([scores, deltas, rois])
            pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i])
        if self.training:
            loss = {}
            for stage, value in enumerate(zip(head_out_list, targets_list)):
                (scores, deltas, rois), targets = value
                loss_stage = self.get_loss(scores, deltas, targets, rois,
                                           self.bbox_weight[stage])
                for k, v in loss_stage.items():
                    loss[k + "_stage{}".format(
                        stage)] = v / self.num_cascade_stages
            return loss, bbox_feat
        else:
            scores, deltas, self.refined_rois = self.get_prediction(
                head_out_list)
            return (deltas, scores), self.head
    def _get_rois_from_boxes(self, boxes, im_shape):
        rois = []
        for i, boxes_per_image in enumerate(boxes):
            clip_box = clip_bbox(boxes_per_image, im_shape[i])
            if self.training:
                keep = nonempty_bbox(clip_box)
                if keep.shape[0] == 0:
                    keep = paddle.zeros([1], dtype='int32')
                clip_box = paddle.gather(clip_box, keep)
            rois.append(clip_box)
        rois_num = paddle.concat([paddle.shape(r)[0] for r in rois])
        return rois, rois_num
    def _get_pred_bbox(self, deltas, proposals, weights):
        pred_proposals = paddle.concat(proposals) if len(
            proposals) > 1 else proposals[0]
        pred_bbox = delta2bbox(deltas, pred_proposals, weights)
        pred_bbox = paddle.reshape(pred_bbox, [-1, deltas.shape[-1]])
        num_prop = [p.shape[0] for p in proposals]
        return pred_bbox.split(num_prop)
    def get_prediction(self, head_out_list):
        """
        head_out_list(List[Tensor]): scores, deltas, rois
        """
        pred_list = []
        scores_list = [F.softmax(head[0]) for head in head_out_list]
        scores = paddle.add_n(scores_list) / self.num_cascade_stages
        # Get deltas and rois from the last stage
        _, deltas, rois = head_out_list[-1]
        return scores, deltas, rois
    def get_refined_rois(self, ):
        return self.refined_rois
--- a/build/lib/ppdet/modeling/heads/centernet_head.py
+++ b/build/lib/ppdet/modeling/heads/centernet_head.py
@ -0,0 +1,192 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import KaimingUniform
 from ppdet.core.workspace import register
 from ppdet.modeling.losses import CTFocalLoss
 class ConvLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=False):
        super(ConvLayer, self).__init__()
        bias_attr = False
        fan_in = ch_in * kernel_size**2
        bound = 1 / math.sqrt(fan_in)
        param_attr = paddle.ParamAttr(initializer=KaimingUniform())
        if bias:
            bias_attr = paddle.ParamAttr(
                initializer=nn.initializer.Uniform(-bound, bound))
        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            weight_attr=param_attr,
            bias_attr=bias_attr)
    def forward(self, inputs):
        out = self.conv(inputs)
        return out
@register
 class CenterNetHead(nn.Layer):
    """
    Args:
        in_channels (int): the channel number of input to CenterNetHead.
        num_classes (int): the number of classes, 80 by default.
        head_planes (int): the channel number in all head, 256 by default.
        heatmap_weight (float): the weight of heatmap loss, 1 by default.
        regress_ltrb (bool): whether to regress left/top/right/bottom or
            width/height for a box, true by default
        size_weight (float): the weight of box size loss, 0.1 by default.
        offset_weight (float): the weight of center offset loss, 1 by default.
    """
    __shared__ = ['num_classes']
    def __init__(self,
                 in_channels,
                 num_classes=80,
                 head_planes=256,
                 heatmap_weight=1,
                 regress_ltrb=True,
                 size_weight=0.1,
                 offset_weight=1):
        super(CenterNetHead, self).__init__()
        self.weights = {
            'heatmap': heatmap_weight,
            'size': size_weight,
            'offset': offset_weight
        }
        self.heatmap = nn.Sequential(
            ConvLayer(
                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            ConvLayer(
                head_planes,
                num_classes,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True))
        self.heatmap[2].conv.bias[:] = -2.19
        self.size = nn.Sequential(
            ConvLayer(
                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            ConvLayer(
                head_planes,
                4 if regress_ltrb else 2,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True))
        self.offset = nn.Sequential(
            ConvLayer(
                in_channels, head_planes, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            ConvLayer(
                head_planes, 2, kernel_size=1, stride=1, padding=0, bias=True))
        self.focal_loss = CTFocalLoss()
    @classmethod
    def from_config(cls, cfg, input_shape):
        if isinstance(input_shape, (list, tuple)):
            input_shape = input_shape[0]
        return {'in_channels': input_shape.channels}
    def forward(self, feat, inputs):
        heatmap = self.heatmap(feat)
        size = self.size(feat)
        offset = self.offset(feat)
        if self.training:
            loss = self.get_loss(heatmap, size, offset, self.weights, inputs)
            return loss
        else:
            heatmap = F.sigmoid(heatmap)
            return {'heatmap': heatmap, 'size': size, 'offset': offset}
    def get_loss(self, heatmap, size, offset, weights, inputs):
        heatmap_target = inputs['heatmap']
        size_target = inputs['size']
        offset_target = inputs['offset']
        index = inputs['index']
        mask = inputs['index_mask']
        heatmap = paddle.clip(F.sigmoid(heatmap), 1e-4, 1 - 1e-4)
        heatmap_loss = self.focal_loss(heatmap, heatmap_target)
        size = paddle.transpose(size, perm=[0, 2, 3, 1])
        size_n, size_h, size_w, size_c = size.shape
        size = paddle.reshape(size, shape=[size_n, -1, size_c])
        index = paddle.unsqueeze(index, 2)
        batch_inds = list()
        for i in range(size_n):
            batch_ind = paddle.full(
                shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
            batch_inds.append(batch_ind)
        batch_inds = paddle.concat(batch_inds, axis=0)
        index = paddle.concat(x=[batch_inds, index], axis=2)
        pos_size = paddle.gather_nd(size, index=index)
        mask = paddle.unsqueeze(mask, axis=2)
        size_mask = paddle.expand_as(mask, pos_size)
        size_mask = paddle.cast(size_mask, dtype=pos_size.dtype)
        pos_num = size_mask.sum()
        size_mask.stop_gradient = True
        size_target.stop_gradient = True
        size_loss = F.l1_loss(
            pos_size * size_mask, size_target * size_mask, reduction='sum')
        size_loss = size_loss / (pos_num + 1e-4)
        offset = paddle.transpose(offset, perm=[0, 2, 3, 1])
        offset_n, offset_h, offset_w, offset_c = offset.shape
        offset = paddle.reshape(offset, shape=[offset_n, -1, offset_c])
        pos_offset = paddle.gather_nd(offset, index=index)
        offset_mask = paddle.expand_as(mask, pos_offset)
        offset_mask = paddle.cast(offset_mask, dtype=pos_offset.dtype)
        pos_num = offset_mask.sum()
        offset_mask.stop_gradient = True
        offset_target.stop_gradient = True
        offset_loss = F.l1_loss(
            pos_offset * offset_mask,
            offset_target * offset_mask,
            reduction='sum')
        offset_loss = offset_loss / (pos_num + 1e-4)
        det_loss = weights['heatmap'] * heatmap_loss + weights[
            'size'] * size_loss + weights['offset'] * offset_loss
        return {
            'det_loss': det_loss,
            'heatmap_loss': heatmap_loss,
            'size_loss': size_loss,
            'offset_loss': offset_loss
        }
--- a/build/lib/ppdet/modeling/heads/face_head.py
+++ b/build/lib/ppdet/modeling/heads/face_head.py
@ -0,0 +1,110 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 from ppdet.core.workspace import register
 from ..layers import AnchorGeneratorSSD
@register
 class FaceHead(nn.Layer):
    """
    Head block for Face detection network
    Args:
        num_classes (int): Number of output classes.
        in_channels (int): Number of input channels.
        anchor_generator(object): instance of anchor genertor method.
        kernel_size (int): kernel size of Conv2D in FaceHead.
        padding (int): padding of Conv2D in FaceHead.
        conv_decay (float): norm_decay (float): weight decay for conv layer weights.
        loss (object): loss of face detection model.
    """
    __shared__ = ['num_classes']
    __inject__ = ['anchor_generator', 'loss']
    def __init__(self,
                 num_classes=80,
                 in_channels=[96, 96],
                 anchor_generator=AnchorGeneratorSSD().__dict__,
                 kernel_size=3,
                 padding=1,
                 conv_decay=0.,
                 loss='SSDLoss'):
        super(FaceHead, self).__init__()
        # add background class
        self.num_classes = num_classes + 1
        self.in_channels = in_channels
        self.anchor_generator = anchor_generator
        self.loss = loss
        if isinstance(anchor_generator, dict):
            self.anchor_generator = AnchorGeneratorSSD(**anchor_generator)
        self.num_priors = self.anchor_generator.num_priors
        self.box_convs = []
        self.score_convs = []
        for i, num_prior in enumerate(self.num_priors):
            box_conv_name = "boxes{}".format(i)
            box_conv = self.add_sublayer(
                box_conv_name,
                nn.Conv2D(
                    in_channels=self.in_channels[i],
                    out_channels=num_prior * 4,
                    kernel_size=kernel_size,
                    padding=padding))
            self.box_convs.append(box_conv)
            score_conv_name = "scores{}".format(i)
            score_conv = self.add_sublayer(
                score_conv_name,
                nn.Conv2D(
                    in_channels=self.in_channels[i],
                    out_channels=num_prior * self.num_classes,
                    kernel_size=kernel_size,
                    padding=padding))
            self.score_convs.append(score_conv)
    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }
    def forward(self, feats, image, gt_bbox=None, gt_class=None):
        box_preds = []
        cls_scores = []
        prior_boxes = []
        for feat, box_conv, score_conv in zip(feats, self.box_convs,
                                              self.score_convs):
            box_pred = box_conv(feat)
            box_pred = paddle.transpose(box_pred, [0, 2, 3, 1])
            box_pred = paddle.reshape(box_pred, [0, -1, 4])
            box_preds.append(box_pred)
            cls_score = score_conv(feat)
            cls_score = paddle.transpose(cls_score, [0, 2, 3, 1])
            cls_score = paddle.reshape(cls_score, [0, -1, self.num_classes])
            cls_scores.append(cls_score)
        prior_boxes = self.anchor_generator(feats, image)
        if self.training:
            return self.get_loss(box_preds, cls_scores, gt_bbox, gt_class,
                                 prior_boxes)
        else:
            return (box_preds, cls_scores), prior_boxes
    def get_loss(self, boxes, scores, gt_bbox, gt_class, prior_boxes):
        return self.loss(boxes, scores, gt_bbox, gt_class, prior_boxes)
--- a/build/lib/ppdet/modeling/heads/fcos_head.py
+++ b/build/lib/ppdet/modeling/heads/fcos_head.py
@ -0,0 +1,269 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import math
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.nn.initializer import Normal, Constant
 from ppdet.core.workspace import register
 from ppdet.modeling.layers import ConvNormLayer
 class ScaleReg(nn.Layer):
    """
    Parameter for scaling the regression outputs.
    """
    def __init__(self):
        super(ScaleReg, self).__init__()
        self.scale_reg = self.create_parameter(
            shape=[1],
            attr=ParamAttr(initializer=Constant(value=1.)),
            dtype="float32")
    def forward(self, inputs):
        out = inputs * self.scale_reg
        return out
@register
 class FCOSFeat(nn.Layer):
    """
    FCOSFeat of FCOS
    Args:
        feat_in (int): The channel number of input Tensor.
        feat_out (int): The channel number of output Tensor.
        num_convs (int): The convolution number of the FCOSFeat.
        norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
        use_dcn (bool): Whether to use dcn in tower or not.
    """
    def __init__(self,
                 feat_in=256,
                 feat_out=256,
                 num_convs=4,
                 norm_type='bn',
                 use_dcn=False):
        super(FCOSFeat, self).__init__()
        self.num_convs = num_convs
        self.norm_type = norm_type
        self.cls_subnet_convs = []
        self.reg_subnet_convs = []
        for i in range(self.num_convs):
            in_c = feat_in if i == 0 else feat_out
            cls_conv_name = 'fcos_head_cls_tower_conv_{}'.format(i)
            cls_conv = self.add_sublayer(
                cls_conv_name,
                ConvNormLayer(
                    ch_in=in_c,
                    ch_out=feat_out,
                    filter_size=3,
                    stride=1,
                    norm_type=norm_type,
                    use_dcn=use_dcn,
                    bias_on=True,
                    lr_scale=2.))
            self.cls_subnet_convs.append(cls_conv)
            reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i)
            reg_conv = self.add_sublayer(
                reg_conv_name,
                ConvNormLayer(
                    ch_in=in_c,
                    ch_out=feat_out,
                    filter_size=3,
                    stride=1,
                    norm_type=norm_type,
                    use_dcn=use_dcn,
                    bias_on=True,
                    lr_scale=2.))
            self.reg_subnet_convs.append(reg_conv)
    def forward(self, fpn_feat):
        cls_feat = fpn_feat
        reg_feat = fpn_feat
        for i in range(self.num_convs):
            cls_feat = F.relu(self.cls_subnet_convs[i](cls_feat))
            reg_feat = F.relu(self.reg_subnet_convs[i](reg_feat))
        return cls_feat, reg_feat
@register
 class FCOSHead(nn.Layer):
    """
    FCOSHead
    Args:
        fcos_feat (object): Instance of 'FCOSFeat'
        num_classes (int): Number of classes
        fpn_stride (list): The stride of each FPN Layer
        prior_prob (float): Used to set the bias init for the class prediction layer
        fcos_loss (object): Instance of 'FCOSLoss'
        norm_reg_targets (bool): Normalization the regression target if true
        centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
    """
    __inject__ = ['fcos_feat', 'fcos_loss']
    __shared__ = ['num_classes']
    def __init__(self,
                 fcos_feat,
                 num_classes=80,
                 fpn_stride=[8, 16, 32, 64, 128],
                 prior_prob=0.01,
                 fcos_loss='FCOSLoss',
                 norm_reg_targets=True,
                 centerness_on_reg=True):
        super(FCOSHead, self).__init__()
        self.fcos_feat = fcos_feat
        self.num_classes = num_classes
        self.fpn_stride = fpn_stride
        self.prior_prob = prior_prob
        self.fcos_loss = fcos_loss
        self.norm_reg_targets = norm_reg_targets
        self.centerness_on_reg = centerness_on_reg
        conv_cls_name = "fcos_head_cls"
        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
        self.fcos_head_cls = self.add_sublayer(
            conv_cls_name,
            nn.Conv2D(
                in_channels=256,
                out_channels=self.num_classes,
                kernel_size=3,
                stride=1,
                padding=1,
                weight_attr=ParamAttr(
                    name=conv_cls_name + "_weights",
                    initializer=Normal(
                        mean=0., std=0.01)),
                bias_attr=ParamAttr(
                    name=conv_cls_name + "_bias",
                    initializer=Constant(value=bias_init_value))))
        conv_reg_name = "fcos_head_reg"
        self.fcos_head_reg = self.add_sublayer(
            conv_reg_name,
            nn.Conv2D(
                in_channels=256,
                out_channels=4,
                kernel_size=3,
                stride=1,
                padding=1,
                weight_attr=ParamAttr(
                    name=conv_reg_name + "_weights",
                    initializer=Normal(
                        mean=0., std=0.01)),
                bias_attr=ParamAttr(
                    name=conv_reg_name + "_bias",
                    initializer=Constant(value=0))))
        conv_centerness_name = "fcos_head_centerness"
        self.fcos_head_centerness = self.add_sublayer(
            conv_centerness_name,
            nn.Conv2D(
                in_channels=256,
                out_channels=1,
                kernel_size=3,
                stride=1,
                padding=1,
                weight_attr=ParamAttr(
                    name=conv_centerness_name + "_weights",
                    initializer=Normal(
                        mean=0., std=0.01)),
                bias_attr=ParamAttr(
                    name=conv_centerness_name + "_bias",
                    initializer=Constant(value=0))))
        self.scales_regs = []
        for i in range(len(self.fpn_stride)):
            lvl = int(math.log(int(self.fpn_stride[i]), 2))
            feat_name = 'p{}_feat'.format(lvl)
            scale_reg = self.add_sublayer(feat_name, ScaleReg())
            self.scales_regs.append(scale_reg)
    def _compute_locations_by_level(self, fpn_stride, feature):
        """
        Compute locations of anchor points of each FPN layer
        Args:
            fpn_stride (int): The stride of current FPN feature map
            feature (Tensor): Tensor of current FPN feature map
        Return:
            Anchor points locations of current FPN feature map
        """
        shape_fm = paddle.shape(feature)
        shape_fm.stop_gradient = True
        h, w = shape_fm[2], shape_fm[3]
        shift_x = paddle.arange(0, w * fpn_stride, fpn_stride)
        shift_y = paddle.arange(0, h * fpn_stride, fpn_stride)
        shift_x = paddle.unsqueeze(shift_x, axis=0)
        shift_y = paddle.unsqueeze(shift_y, axis=1)
        shift_x = paddle.expand(shift_x, shape=[h, w])
        shift_y = paddle.expand(shift_y, shape=[h, w])
        shift_x.stop_gradient = True
        shift_y.stop_gradient = True
        shift_x = paddle.reshape(shift_x, shape=[-1])
        shift_y = paddle.reshape(shift_y, shape=[-1])
        location = paddle.stack(
            [shift_x, shift_y], axis=-1) + float(fpn_stride) / 2
        location.stop_gradient = True
        return location
    def forward(self, fpn_feats, is_training):
        assert len(fpn_feats) == len(
            self.fpn_stride
        ), "The size of fpn_feats is not equal to size of fpn_stride"
        cls_logits_list = []
        bboxes_reg_list = []
        centerness_list = []
        for scale_reg, fpn_stride, fpn_feat in zip(self.scales_regs,
                                                   self.fpn_stride, fpn_feats):
            fcos_cls_feat, fcos_reg_feat = self.fcos_feat(fpn_feat)
            cls_logits = self.fcos_head_cls(fcos_cls_feat)
            bbox_reg = scale_reg(self.fcos_head_reg(fcos_reg_feat))
            if self.centerness_on_reg:
                centerness = self.fcos_head_centerness(fcos_reg_feat)
            else:
                centerness = self.fcos_head_centerness(fcos_cls_feat)
            if self.norm_reg_targets:
                bbox_reg = F.relu(bbox_reg)
                if not is_training:
                    bbox_reg = bbox_reg * fpn_stride
            else:
                bbox_reg = paddle.exp(bbox_reg)
            cls_logits_list.append(cls_logits)
            bboxes_reg_list.append(bbox_reg)
            centerness_list.append(centerness)
        if not is_training:
            locations_list = []
            for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
                location = self._compute_locations_by_level(fpn_stride, feature)
                locations_list.append(location)
            return locations_list, cls_logits_list, bboxes_reg_list, centerness_list
        else:
            return cls_logits_list, bboxes_reg_list, centerness_list
    def get_loss(self, fcos_head_outs, tag_labels, tag_bboxes, tag_centerness):
        cls_logits, bboxes_reg, centerness = fcos_head_outs
        return self.fcos_loss(cls_logits, bboxes_reg, centerness, tag_labels,
                              tag_bboxes, tag_centerness)
--- a/build/lib/ppdet/modeling/heads/keypoint_hrhrnet_head.py
+++ b/build/lib/ppdet/modeling/heads/keypoint_hrhrnet_head.py
@ -0,0 +1,108 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 from ppdet.core.workspace import register
 from .. import layers as L
 from ..backbones.hrnet import BasicBlock
@register
 class HrHRNetHead(nn.Layer):
    __inject__ = ['loss']
    def __init__(self, num_joints, loss='HrHRNetLoss', swahr=False, width=32):
        """
        Head for HigherHRNet network
        Args:
            num_joints (int): number of keypoints
            hrloss (object): HrHRNetLoss instance
            swahr (bool): whether to use swahr
            width (int): hrnet channel width
        """
        super(HrHRNetHead, self).__init__()
        self.loss = loss
        self.num_joints = num_joints
        num_featout1 = num_joints * 2
        num_featout2 = num_joints
        self.swahr = swahr
        self.conv1 = L.Conv2d(width, num_featout1, 1, 1, 0, bias=True)
        self.conv2 = L.Conv2d(width, num_featout2, 1, 1, 0, bias=True)
        self.deconv = nn.Sequential(
            L.ConvTranspose2d(
                num_featout1 + width, width, 4, 2, 1, 0, bias=False),
            L.BatchNorm2d(width),
            L.ReLU())
        self.blocks = nn.Sequential(*(BasicBlock(
            num_channels=width,
            num_filters=width,
            has_se=False,
            freeze_norm=False,
            name='HrHRNetHead_{}'.format(i)) for i in range(4)))
        self.interpolate = L.Upsample(2, mode='bilinear')
        self.concat = L.Concat(dim=1)
        if swahr:
            self.scalelayer0 = nn.Sequential(
                L.Conv2d(
                    width, num_joints, 1, 1, 0, bias=True),
                L.BatchNorm2d(num_joints),
                L.ReLU(),
                L.Conv2d(
                    num_joints,
                    num_joints,
                    9,
                    1,
                    4,
                    groups=num_joints,
                    bias=True))
            self.scalelayer1 = nn.Sequential(
                L.Conv2d(
                    width, num_joints, 1, 1, 0, bias=True),
                L.BatchNorm2d(num_joints),
                L.ReLU(),
                L.Conv2d(
                    num_joints,
                    num_joints,
                    9,
                    1,
                    4,
                    groups=num_joints,
                    bias=True))
    def forward(self, feats, targets=None):
        x1 = feats[0]
        xo1 = self.conv1(x1)
        x2 = self.blocks(self.deconv(self.concat((x1, xo1))))
        xo2 = self.conv2(x2)
        num_joints = self.num_joints
        if self.training:
            heatmap1, tagmap = paddle.split(xo1, 2, axis=1)
            if self.swahr:
                so1 = self.scalelayer0(x1)
                so2 = self.scalelayer1(x2)
                hrhrnet_outputs = ([heatmap1, so1], [xo2, so2], tagmap)
                return self.loss(hrhrnet_outputs, targets)
            else:
                hrhrnet_outputs = (heatmap1, xo2, tagmap)
                return self.loss(hrhrnet_outputs, targets)
        # averaged heatmap, upsampled tagmap
        upsampled = self.interpolate(xo1)
        avg = (upsampled[:, :num_joints] + xo2[:, :num_joints]) / 2
        return avg, upsampled[:, num_joints:]
--- a/build/lib/ppdet/modeling/heads/mask_head.py
+++ b/build/lib/ppdet/modeling/heads/mask_head.py
@ -0,0 +1,250 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import KaimingNormal
 from ppdet.core.workspace import register, create
 from ppdet.modeling.layers import ConvNormLayer
 from .roi_extractor import RoIAlign
@register
 class MaskFeat(nn.Layer):
    """
    Feature extraction in Mask head
    Args:
        in_channel (int): Input channels
        out_channel (int): Output channels
        num_convs (int): The number of conv layers, default 4
        norm_type (string | None): Norm type, bn, gn, sync_bn are available,
            default None
    """
    def __init__(self,
                 in_channel=256,
                 out_channel=256,
                 num_convs=4,
                 norm_type=None):
        super(MaskFeat, self).__init__()
        self.num_convs = num_convs
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.norm_type = norm_type
        fan_conv = out_channel * 3 * 3
        fan_deconv = out_channel * 2 * 2
        mask_conv = nn.Sequential()
        if norm_type == 'gn':
            for i in range(self.num_convs):
                conv_name = 'mask_inter_feat_{}'.format(i + 1)
                mask_conv.add_sublayer(
                    conv_name,
                    ConvNormLayer(
                        ch_in=in_channel if i == 0 else out_channel,
                        ch_out=out_channel,
                        filter_size=3,
                        stride=1,
                        norm_type=self.norm_type,
                        initializer=KaimingNormal(fan_in=fan_conv),
                        skip_quant=True))
                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
        else:
            for i in range(self.num_convs):
                conv_name = 'mask_inter_feat_{}'.format(i + 1)
                conv = nn.Conv2D(
                    in_channels=in_channel if i == 0 else out_channel,
                    out_channels=out_channel,
                    kernel_size=3,
                    padding=1,
                    weight_attr=paddle.ParamAttr(
                        initializer=KaimingNormal(fan_in=fan_conv)))
                conv.skip_quant = True
                mask_conv.add_sublayer(conv_name, conv)
                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
        mask_conv.add_sublayer(
            'conv5_mask',
            nn.Conv2DTranspose(
                in_channels=self.in_channel,
                out_channels=self.out_channel,
                kernel_size=2,
                stride=2,
                weight_attr=paddle.ParamAttr(
                    initializer=KaimingNormal(fan_in=fan_deconv))))
        mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU())
        self.upsample = mask_conv
    @classmethod
    def from_config(cls, cfg, input_shape):
        if isinstance(input_shape, (list, tuple)):
            input_shape = input_shape[0]
        return {'in_channel': input_shape.channels, }
    def out_channels(self):
        return self.out_channel
    def forward(self, feats):
        return self.upsample(feats)
@register
 class MaskHead(nn.Layer):
    __shared__ = ['num_classes']
    __inject__ = ['mask_assigner']
    """
    RCNN mask head
    Args:
        head (nn.Layer): Extract feature in mask head
        roi_extractor (object): The module of RoI Extractor
        mask_assigner (object): The module of Mask Assigner, 
            label and sample the mask
        num_classes (int): The number of classes
        share_bbox_feat (bool): Whether to share the feature from bbox head,
            default false
    """
    def __init__(self,
                 head,
                 roi_extractor=RoIAlign().__dict__,
                 mask_assigner='MaskAssigner',
                 num_classes=80,
                 share_bbox_feat=False):
        super(MaskHead, self).__init__()
        self.num_classes = num_classes
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIAlign(**roi_extractor)
        self.head = head
        self.in_channels = head.out_channels()
        self.mask_assigner = mask_assigner
        self.share_bbox_feat = share_bbox_feat
        self.bbox_head = None
        self.mask_fcn_logits = nn.Conv2D(
            in_channels=self.in_channels,
            out_channels=self.num_classes,
            kernel_size=1,
            weight_attr=paddle.ParamAttr(initializer=KaimingNormal(
                fan_in=self.num_classes)))
        self.mask_fcn_logits.skip_quant = True
    @classmethod
    def from_config(cls, cfg, input_shape):
        roi_pooler = cfg['roi_extractor']
        assert isinstance(roi_pooler, dict)
        kwargs = RoIAlign.from_config(cfg, input_shape)
        roi_pooler.update(kwargs)
        kwargs = {'input_shape': input_shape}
        head = create(cfg['head'], **kwargs)
        return {
            'roi_extractor': roi_pooler,
            'head': head,
        }
    def get_loss(self, mask_logits, mask_label, mask_target, mask_weight):
        mask_label = F.one_hot(mask_label, self.num_classes).unsqueeze([2, 3])
        mask_label = paddle.expand_as(mask_label, mask_logits)
        mask_label.stop_gradient = True
        mask_pred = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label))
        shape = mask_logits.shape
        mask_pred = paddle.reshape(mask_pred, [shape[0], shape[2], shape[3]])
        mask_target = mask_target.cast('float32')
        mask_weight = mask_weight.unsqueeze([1, 2])
        loss_mask = F.binary_cross_entropy_with_logits(
            mask_pred, mask_target, weight=mask_weight, reduction="mean")
        return loss_mask
    def forward_train(self, body_feats, rois, rois_num, inputs, targets,
                      bbox_feat):
        """
        body_feats (list[Tensor]): Multi-level backbone features
        rois (list[Tensor]): Proposals for each batch with shape [N, 4]
        rois_num (Tensor): The number of proposals for each batch
        inputs (dict): ground truth info
        """
        tgt_labels, _, tgt_gt_inds = targets
        rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner(
            rois, tgt_labels, tgt_gt_inds, inputs)
        if self.share_bbox_feat:
            rois_feat = paddle.gather(bbox_feat, mask_index)
        else:
            rois_feat = self.roi_extractor(body_feats, rois, rois_num)
        mask_feat = self.head(rois_feat)
        mask_logits = self.mask_fcn_logits(mask_feat)
        loss_mask = self.get_loss(mask_logits, tgt_classes, tgt_masks,
                                  tgt_weights)
        return {'loss_mask': loss_mask}
    def forward_test(self,
                     body_feats,
                     rois,
                     rois_num,
                     scale_factor,
                     feat_func=None):
        """
        body_feats (list[Tensor]): Multi-level backbone features
        rois (Tensor): Prediction from bbox head with shape [N, 6]
        rois_num (Tensor): The number of prediction for each batch
        scale_factor (Tensor): The scale factor from origin size to input size
        """
        if rois.shape[0] == 0:
            mask_out = paddle.full([1, 1, 1, 1], -1)
        else:
            bbox = [rois[:, 2:]]
            labels = rois[:, 0].cast('int32')
            rois_feat = self.roi_extractor(body_feats, bbox, rois_num)
            if self.share_bbox_feat:
                assert feat_func is not None
                rois_feat = feat_func(rois_feat)
            mask_feat = self.head(rois_feat)
            mask_logit = self.mask_fcn_logits(mask_feat)
            mask_num_class = mask_logit.shape[1]
            if mask_num_class == 1:
                mask_out = F.sigmoid(mask_logit)
            else:
                num_masks = mask_logit.shape[0]
                mask_out = []
                # TODO: need to optimize gather
                for i in range(mask_logit.shape[0]):
                    pred_masks = paddle.unsqueeze(
                        mask_logit[i, :, :, :], axis=0)
                    mask = paddle.gather(pred_masks, labels[i], axis=1)
                    mask_out.append(mask)
                mask_out = F.sigmoid(paddle.concat(mask_out))
        return mask_out
    def forward(self,
                body_feats,
                rois,
                rois_num,
                inputs,
                targets=None,
                bbox_feat=None,
                feat_func=None):
        if self.training:
            return self.forward_train(body_feats, rois, rois_num, inputs,
                                      targets, bbox_feat)
        else:
            im_scale = inputs['scale_factor']
            return self.forward_test(body_feats, rois, rois_num, im_scale,
                                     feat_func)
--- a/build/lib/ppdet/modeling/heads/roi_extractor.py
+++ b/build/lib/ppdet/modeling/heads/roi_extractor.py
@ -0,0 +1,111 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 from ppdet.core.workspace import register
 from ppdet.modeling import ops
 def _to_list(v):
    if not isinstance(v, (list, tuple)):
        return [v]
    return v
@register
 class RoIAlign(object):
    """
    RoI Align module
    For more details, please refer to the document of roi_align in
    in ppdet/modeing/ops.py
    Args:
        resolution (int): The output size, default 14
        spatial_scale (float): Multiplicative spatial scale factor to translate
            ROI coords from their input scale to the scale used when pooling.
            default 0.0625
        sampling_ratio (int): The number of sampling points in the interpolation
            grid, default 0
        canconical_level (int): The referring level of FPN layer with 
            specified level. default 4
        canonical_size (int): The referring scale of FPN layer with 
            specified scale. default 224
        start_level (int): The start level of FPN layer to extract RoI feature,
            default 0
        end_level (int): The end level of FPN layer to extract RoI feature,
            default 3
        aligned (bool): Whether to add offset to rois' coord in roi_align.
            default false
    """
    def __init__(self,
                 resolution=14,
                 spatial_scale=0.0625,
                 sampling_ratio=0,
                 canconical_level=4,
                 canonical_size=224,
                 start_level=0,
                 end_level=3,
                 aligned=False):
        super(RoIAlign, self).__init__()
        self.resolution = resolution
        self.spatial_scale = _to_list(spatial_scale)
        self.sampling_ratio = sampling_ratio
        self.canconical_level = canconical_level
        self.canonical_size = canonical_size
        self.start_level = start_level
        self.end_level = end_level
        self.aligned = aligned
    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'spatial_scale': [1. / i.stride for i in input_shape]}
    def __call__(self, feats, roi, rois_num):
        roi = paddle.concat(roi) if len(roi) > 1 else roi[0]
        if len(feats) == 1:
            rois_feat = ops.roi_align(
                feats[self.start_level],
                roi,
                self.resolution,
                self.spatial_scale[0],
                rois_num=rois_num,
                aligned=self.aligned)
        else:
            offset = 2
            k_min = self.start_level + offset
            k_max = self.end_level + offset
            rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals(
                roi,
                k_min,
                k_max,
                self.canconical_level,
                self.canonical_size,
                rois_num=rois_num)
            rois_feat_list = []
            for lvl in range(self.start_level, self.end_level + 1):
                roi_feat = ops.roi_align(
                    feats[lvl],
                    rois_dist[lvl],
                    self.resolution,
                    self.spatial_scale[lvl],
                    sampling_ratio=self.sampling_ratio,
                    rois_num=rois_num_dist[lvl],
                    aligned=self.aligned)
                rois_feat_list.append(roi_feat)
            rois_feat_shuffle = paddle.concat(rois_feat_list)
            rois_feat = paddle.gather(rois_feat_shuffle, restore_index)
        return rois_feat
--- a/build/lib/ppdet/modeling/heads/s2anet_head.py
+++ b/build/lib/ppdet/modeling/heads/s2anet_head.py
@ -0,0 +1,841 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, Constant
 from ppdet.core.workspace import register
 from ppdet.modeling.proposal_generator.target_layer import RBoxAssigner
 import numpy as np
 class S2ANetAnchorGenerator(nn.Layer):
    """
    AnchorGenerator by paddle
    """
    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
        super(S2ANetAnchorGenerator, self).__init__()
        self.base_size = base_size
        self.scales = paddle.to_tensor(scales)
        self.ratios = paddle.to_tensor(ratios)
        self.scale_major = scale_major
        self.ctr = ctr
        self.base_anchors = self.gen_base_anchors()
    @property
    def num_base_anchors(self):
        return self.base_anchors.shape[0]
    def gen_base_anchors(self):
        w = self.base_size
        h = self.base_size
        if self.ctr is None:
            x_ctr = 0.5 * (w - 1)
            y_ctr = 0.5 * (h - 1)
        else:
            x_ctr, y_ctr = self.ctr
        h_ratios = paddle.sqrt(self.ratios)
        w_ratios = 1 / h_ratios
        if self.scale_major:
            ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
            hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
        else:
            ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
            hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
        base_anchors = paddle.stack(
            [
                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
            ],
            axis=-1)
        base_anchors = paddle.round(base_anchors)
        return base_anchors
    def _meshgrid(self, x, y, row_major=True):
        yy, xx = paddle.meshgrid(x, y)
        yy = yy.reshape([-1])
        xx = xx.reshape([-1])
        if row_major:
            return xx, yy
        else:
            return yy, xx
    def forward(self, featmap_size, stride=16):
        # featmap_size*stride project it to original area
        base_anchors = self.base_anchors
        feat_h = featmap_size[0]
        feat_w = featmap_size[1]
        shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
        shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
        all_anchors = base_anchors[:, :] + shifts[:, :]
        all_anchors = all_anchors.reshape([feat_h * feat_w, 4])
        return all_anchors
    def valid_flags(self, featmap_size, valid_size):
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = paddle.zeros([feat_w], dtype='uint8')
        valid_y = paddle.zeros([feat_h], dtype='uint8')
        valid_x[:valid_w] = 1
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
        valid = valid_xx & valid_yy
        valid = valid[:, None].expand(
            [valid.size(0), self.num_base_anchors]).reshape([-1])
        return valid
 class AlignConv(nn.Layer):
    def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
        super(AlignConv, self).__init__()
        self.kernel_size = kernel_size
        self.align_conv = paddle.vision.ops.DeformConv2D(
            in_channels,
            out_channels,
            kernel_size=self.kernel_size,
            padding=(self.kernel_size - 1) // 2,
            groups=groups,
            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
            bias_attr=None)
    @paddle.no_grad()
    def get_offset(self, anchors, featmap_size, stride):
        """
        Args:
            anchors: [M,5] xc,yc,w,h,angle
            featmap_size: (feat_h, feat_w)
            stride: 8
        Returns:
        """
        anchors = paddle.reshape(anchors, [-1, 5])  # (NA,5)
        dtype = anchors.dtype
        feat_h, feat_w = featmap_size
        pad = (self.kernel_size - 1) // 2
        idx = paddle.arange(-pad, pad + 1, dtype=dtype)
        yy, xx = paddle.meshgrid(idx, idx)
        xx = paddle.reshape(xx, [-1])
        yy = paddle.reshape(yy, [-1])
        # get sampling locations of default conv
        xc = paddle.arange(0, feat_w, dtype=dtype)
        yc = paddle.arange(0, feat_h, dtype=dtype)
        yc, xc = paddle.meshgrid(yc, xc)
        xc = paddle.reshape(xc, [-1, 1])
        yc = paddle.reshape(yc, [-1, 1])
        x_conv = xc + xx
        y_conv = yc + yy
        # get sampling locations of anchors
        # x_ctr, y_ctr, w, h, a = np.unbind(anchors, dim=1)
        x_ctr = anchors[:, 0]
        y_ctr = anchors[:, 1]
        w = anchors[:, 2]
        h = anchors[:, 3]
        a = anchors[:, 4]
        x_ctr = paddle.reshape(x_ctr, [x_ctr.shape[0], 1])
        y_ctr = paddle.reshape(y_ctr, [y_ctr.shape[0], 1])
        w = paddle.reshape(w, [w.shape[0], 1])
        h = paddle.reshape(h, [h.shape[0], 1])
        a = paddle.reshape(a, [a.shape[0], 1])
        x_ctr = x_ctr / stride
        y_ctr = y_ctr / stride
        w_s = w / stride
        h_s = h / stride
        cos, sin = paddle.cos(a), paddle.sin(a)
        dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
        x, y = dw * xx, dh * yy
        xr = cos * x - sin * y
        yr = sin * x + cos * y
        x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
        # get offset filed
        offset_x = x_anchor - x_conv
        offset_y = y_anchor - y_conv
        # x, y in anchors is opposite in image coordinates,
        # so we stack them with y, x other than x, y
        offset = paddle.stack([offset_y, offset_x], axis=-1)
        # NA,ks*ks*2
        # [NA, ks, ks, 2] --> [NA, ks*ks*2]
        offset = paddle.reshape(offset, [offset.shape[0], -1])
        # [NA, ks*ks*2] --> [ks*ks*2, NA]
        offset = paddle.transpose(offset, [1, 0])
        # [NA, ks*ks*2] --> [1, ks*ks*2, H, W]
        offset = paddle.reshape(offset, [1, -1, feat_h, feat_w])
        return offset
    def forward(self, x, refine_anchors, stride):
        featmap_size = (x.shape[2], x.shape[3])
        offset = self.get_offset(refine_anchors, featmap_size, stride)
        x = F.relu(self.align_conv(x, offset))
        return x
@register
 class S2ANetHead(nn.Layer):
    """
    S2Anet head
    Args:
        stacked_convs (int): number of stacked_convs
        feat_in (int): input channels of feat
        feat_out (int): output channels of feat
        num_classes (int): num_classes
        anchor_strides (list): stride of anchors
        anchor_scales (list): scale of anchors
        anchor_ratios (list): ratios of anchors
        target_means (list): target_means
        target_stds (list): target_stds
        align_conv_type (str): align_conv_type ['Conv', 'AlignConv']
        align_conv_size (int): kernel size of align_conv
        use_sigmoid_cls (bool): use sigmoid_cls or not
        reg_loss_weight (list): loss weight for regression
    """
    __shared__ = ['num_classes']
    __inject__ = ['anchor_assign']
    def __init__(self,
                 stacked_convs=2,
                 feat_in=256,
                 feat_out=256,
                 num_classes=15,
                 anchor_strides=[8, 16, 32, 64, 128],
                 anchor_scales=[4],
                 anchor_ratios=[1.0],
                 target_means=0.0,
                 target_stds=1.0,
                 align_conv_type='AlignConv',
                 align_conv_size=3,
                 use_sigmoid_cls=True,
                 anchor_assign=RBoxAssigner().__dict__,
                 reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.0],
                 cls_loss_weight=[1.0, 1.0]):
        super(S2ANetHead, self).__init__()
        self.stacked_convs = stacked_convs
        self.feat_in = feat_in
        self.feat_out = feat_out
        self.anchor_list = None
        self.anchor_scales = anchor_scales
        self.anchor_ratios = anchor_ratios
        self.anchor_strides = anchor_strides
        self.anchor_base_sizes = list(anchor_strides)
        self.target_means = target_means
        self.target_stds = target_stds
        assert align_conv_type in ['AlignConv', 'Conv', 'DCN']
        self.align_conv_type = align_conv_type
        self.align_conv_size = align_conv_size
        self.use_sigmoid_cls = use_sigmoid_cls
        self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1
        self.sampling = False
        self.anchor_assign = anchor_assign
        self.reg_loss_weight = reg_loss_weight
        self.cls_loss_weight = cls_loss_weight
        self.s2anet_head_out = None
        # anchor
        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                S2ANetAnchorGenerator(anchor_base, anchor_scales,
                                      anchor_ratios))
        self.anchor_generators = paddle.nn.LayerList(self.anchor_generators)
        self.add_sublayer('s2anet_anchor_gen', self.anchor_generators)
        self.fam_cls_convs = nn.Sequential()
        self.fam_reg_convs = nn.Sequential()
        for i in range(self.stacked_convs):
            chan_in = self.feat_in if i == 0 else self.feat_out
            self.fam_cls_convs.add_sublayer(
                'fam_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))
            self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i),
                                            nn.ReLU())
            self.fam_reg_convs.add_sublayer(
                'fam_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))
            self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i),
                                            nn.ReLU())
        self.fam_reg = nn.Conv2D(
            self.feat_out,
            5,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))
        prior_prob = 0.01
        bias_init = float(-np.log((1 - prior_prob) / prior_prob))
        self.fam_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))
        if self.align_conv_type == "AlignConv":
            self.align_conv = AlignConv(self.feat_out, self.feat_out,
                                        self.align_conv_size)
        elif self.align_conv_type == "Conv":
            self.align_conv = nn.Conv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                bias_attr=ParamAttr(initializer=Constant(0)))
        elif self.align_conv_type == "DCN":
            self.align_conv_offset = nn.Conv2D(
                self.feat_out,
                2 * self.align_conv_size**2,
                1,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=ParamAttr(initializer=Constant(0)))
            self.align_conv = paddle.vision.ops.DeformConv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=False)
        self.or_conv = nn.Conv2D(
            self.feat_out,
            self.feat_out,
            kernel_size=3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))
        # ODM
        self.odm_cls_convs = nn.Sequential()
        self.odm_reg_convs = nn.Sequential()
        for i in range(self.stacked_convs):
            ch_in = self.feat_out
            # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out
            self.odm_cls_convs.add_sublayer(
                'odm_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=ch_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))
            self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i),
                                            nn.ReLU())
            self.odm_reg_convs.add_sublayer(
                'odm_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=self.feat_out,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))
            self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i),
                                            nn.ReLU())
        self.odm_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))
        self.odm_reg = nn.Conv2D(
            self.feat_out,
            5,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))
        self.featmap_size_list = []
        self.init_anchors_list = []
        self.rbox_anchors_list = []
        self.refine_anchor_list = []
    def forward(self, feats):
        fam_reg_branch_list = []
        fam_cls_branch_list = []
        odm_reg_branch_list = []
        odm_cls_branch_list = []
        fam_reg1_branch_list = []
        self.featmap_size_list = []
        self.init_anchors_list = []
        self.rbox_anchors_list = []
        self.refine_anchor_list = []
        for i, feat in enumerate(feats):
            # prepare anchor
            featmap_size = paddle.shape(feat)[-2:]
            self.featmap_size_list.append(featmap_size)
            init_anchors = self.anchor_generators[i](featmap_size,
                                                     self.anchor_strides[i])
            init_anchors = paddle.reshape(
                init_anchors, [featmap_size[0] * featmap_size[1], 4])
            self.init_anchors_list.append(init_anchors)
            rbox_anchors = self.rect2rbox(init_anchors)
            self.rbox_anchors_list.append(rbox_anchors)
            fam_cls_feat = self.fam_cls_convs(feat)
            fam_cls = self.fam_cls(fam_cls_feat)
            # [N, CLS, H, W] --> [N, H, W, CLS]
            fam_cls = fam_cls.transpose([0, 2, 3, 1])
            fam_cls_reshape = paddle.reshape(
                fam_cls, [fam_cls.shape[0], -1, self.cls_out_channels])
            fam_cls_branch_list.append(fam_cls_reshape)
            fam_reg_feat = self.fam_reg_convs(feat)
            fam_reg = self.fam_reg(fam_reg_feat)
            # [N, 5, H, W] --> [N, H, W, 5]
            fam_reg = fam_reg.transpose([0, 2, 3, 1])
            fam_reg_reshape = paddle.reshape(fam_reg, [fam_reg.shape[0], -1, 5])
            fam_reg_branch_list.append(fam_reg_reshape)
            # refine anchors
            fam_reg1 = fam_reg.clone()
            fam_reg1.stop_gradient = True
            rbox_anchors.stop_gradient = True
            fam_reg1_branch_list.append(fam_reg1)
            refine_anchor = self.bbox_decode(
                fam_reg1, rbox_anchors, self.target_stds, self.target_means)
            self.refine_anchor_list.append(refine_anchor)
            if self.align_conv_type == 'AlignConv':
                align_feat = self.align_conv(feat,
                                             refine_anchor.clone(),
                                             self.anchor_strides[i])
            elif self.align_conv_type == 'DCN':
                align_offset = self.align_conv_offset(feat)
                align_feat = self.align_conv(feat, align_offset)
            elif self.align_conv_type == 'Conv':
                align_feat = self.align_conv(feat)
            or_feat = self.or_conv(align_feat)
            odm_reg_feat = or_feat
            odm_cls_feat = or_feat
            odm_reg_feat = self.odm_reg_convs(odm_reg_feat)
            odm_cls_feat = self.odm_cls_convs(odm_cls_feat)
            odm_cls_score = self.odm_cls(odm_cls_feat)
            # [N, CLS, H, W] --> [N, H, W, CLS]
            odm_cls_score = odm_cls_score.transpose([0, 2, 3, 1])
            odm_cls_score_reshape = paddle.reshape(
                odm_cls_score,
                [odm_cls_score.shape[0], -1, self.cls_out_channels])
            odm_cls_branch_list.append(odm_cls_score_reshape)
            odm_bbox_pred = self.odm_reg(odm_reg_feat)
            # [N, 5, H, W] --> [N, H, W, 5]
            odm_bbox_pred = odm_bbox_pred.transpose([0, 2, 3, 1])
            odm_bbox_pred_reshape = paddle.reshape(
                odm_bbox_pred, [odm_bbox_pred.shape[0], -1, 5])
            odm_reg_branch_list.append(odm_bbox_pred_reshape)
        self.s2anet_head_out = (fam_cls_branch_list, fam_reg_branch_list,
                                odm_cls_branch_list, odm_reg_branch_list)
        return self.s2anet_head_out
    def rect2rbox(self, bboxes):
        """
        :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
        :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
        """
        num_boxes = paddle.shape(bboxes)[0]
        x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
        y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
        edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
        edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])
        rbox_w = paddle.maximum(edges1, edges2)
        rbox_h = paddle.minimum(edges1, edges2)
        # set angle
        inds = edges1 < edges2
        inds = paddle.cast(inds, 'int32')
        inds1 = inds * paddle.arange(0, num_boxes)
        rboxes_angle = inds1 * np.pi / 2.0
        rboxes = paddle.stack(
            (x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=1)
        return rboxes
    # deltas to rbox
    def delta2rbox(self, rrois, deltas, means, stds, wh_ratio_clip=1e-6):
        """
        :param rrois: (cx, cy, w, h, theta)
        :param deltas: (dx, dy, dw, dh, dtheta)
        :param means: means of anchor
        :param stds: stds of anchor
        :param wh_ratio_clip: clip threshold of wh_ratio
        :return:
        """
        deltas = paddle.reshape(deltas, [-1, 5])
        rrois = paddle.reshape(rrois, [-1, 5])
        pd_means = paddle.ones(shape=[5]) * means
        pd_stds = paddle.ones(shape=[5]) * stds
        denorm_deltas = deltas * pd_stds + pd_means
        dx = denorm_deltas[:, 0]
        dy = denorm_deltas[:, 1]
        dw = denorm_deltas[:, 2]
        dh = denorm_deltas[:, 3]
        dangle = denorm_deltas[:, 4]
        max_ratio = np.abs(np.log(wh_ratio_clip))
        dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
        dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
        rroi_x = rrois[:, 0]
        rroi_y = rrois[:, 1]
        rroi_w = rrois[:, 2]
        rroi_h = rrois[:, 3]
        rroi_angle = rrois[:, 4]
        gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
            rroi_angle) + rroi_x
        gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
            rroi_angle) + rroi_y
        gw = rroi_w * dw.exp()
        gh = rroi_h * dh.exp()
        ga = np.pi * dangle + rroi_angle
        ga = (ga + np.pi / 4) % np.pi - np.pi / 4
        bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
        return bboxes
    def bbox_decode(self, bbox_preds, anchors, stds, means, wh_ratio_clip=1e-6):
        """decode bbox from deltas
        Args:
            bbox_preds: bbox_preds, shape=[N,H,W,5]
            anchors: anchors, shape=[H,W,5]
        return:
            bboxes: return decoded bboxes, shape=[N*H*W,5]
        """
        num_imgs, H, W, _ = bbox_preds.shape
        bbox_delta = paddle.reshape(bbox_preds, [-1, 5])
        bboxes = self.delta2rbox(anchors, bbox_delta, means, stds,
                                 wh_ratio_clip)
        return bboxes
    def get_prediction(self, nms_pre):
        refine_anchors = self.refine_anchor_list
        fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = self.s2anet_head_out
        pred_scores, pred_bboxes = self.get_bboxes(
            odm_cls_branch_list,
            odm_reg_branch_list,
            refine_anchors,
            nms_pre,
            cls_out_channels=self.cls_out_channels,
            use_sigmoid_cls=self.use_sigmoid_cls)
        return pred_scores, pred_bboxes
    def smooth_l1_loss(self, pred, label, delta=1.0 / 9.0):
        """
        Args:
            pred: pred score
            label: label
            delta: delta
        Returns: loss
        """
        assert pred.shape == label.shape and label.numel() > 0
        assert delta > 0
        diff = paddle.abs(pred - label)
        loss = paddle.where(diff < delta, 0.5 * diff * diff / delta,
                            diff - 0.5 * delta)
        return loss
    def get_fam_loss(self, fam_target, s2anet_head_out):
        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
         pos_inds, neg_inds) = fam_target
        fam_cls_score, fam_bbox_pred = s2anet_head_out
        # step1:  sample count
        num_total_samples = len(pos_inds) + len(
            neg_inds) if self.sampling else len(pos_inds)
        num_total_samples = max(1, num_total_samples)
        # step2: calc cls loss
        feat_labels = feat_labels.reshape(-1)
        feat_label_weights = feat_label_weights.reshape(-1)
        fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
        fam_cls_score1 = fam_cls_score
        # gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
        feat_labels = feat_labels + 1
        feat_labels = paddle.to_tensor(feat_labels)
        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
        feat_labels_one_hot.stop_gradient = True
        num_total_samples = paddle.to_tensor(
            num_total_samples, dtype='float32', stop_gradient=True)
        fam_cls = F.sigmoid_focal_loss(
            fam_cls_score1,
            feat_labels_one_hot,
            normalizer=num_total_samples,
            reduction='none')
        feat_label_weights = feat_label_weights.reshape(
            feat_label_weights.shape[0], 1)
        feat_label_weights = np.repeat(
            feat_label_weights, self.cls_out_channels, axis=1)
        feat_label_weights = paddle.to_tensor(
            feat_label_weights, stop_gradient=True)
        fam_cls = fam_cls * feat_label_weights
        fam_cls_total = paddle.sum(fam_cls)
        # step3: regression loss
        feat_bbox_targets = paddle.to_tensor(
            feat_bbox_targets, dtype='float32', stop_gradient=True)
        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
        fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
        fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
        fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
        loss_weight = paddle.to_tensor(
            self.reg_loss_weight, dtype='float32', stop_gradient=True)
        fam_bbox = paddle.multiply(fam_bbox, loss_weight)
        feat_bbox_weights = paddle.to_tensor(
            feat_bbox_weights, stop_gradient=True)
        fam_bbox = fam_bbox * feat_bbox_weights
        fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
        fam_cls_loss_weight = paddle.to_tensor(
            self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
        fam_cls_loss = fam_cls_total * fam_cls_loss_weight
        fam_reg_loss = paddle.add_n(fam_bbox_total)
        return fam_cls_loss, fam_reg_loss
    def get_odm_loss(self, odm_target, s2anet_head_out):
        (feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
         pos_inds, neg_inds) = odm_target
        odm_cls_score, odm_bbox_pred = s2anet_head_out
        # step1:  sample count
        num_total_samples = len(pos_inds) + len(
            neg_inds) if self.sampling else len(pos_inds)
        num_total_samples = max(1, num_total_samples)
        # step2: calc cls loss
        feat_labels = feat_labels.reshape(-1)
        feat_label_weights = feat_label_weights.reshape(-1)
        odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
        odm_cls_score1 = odm_cls_score
        # gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
        # for debug 0426
        feat_labels = feat_labels + 1
        feat_labels = paddle.to_tensor(feat_labels)
        feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
        feat_labels_one_hot = feat_labels_one_hot[:, 1:]
        feat_labels_one_hot.stop_gradient = True
        num_total_samples = paddle.to_tensor(
            num_total_samples, dtype='float32', stop_gradient=True)
        odm_cls = F.sigmoid_focal_loss(
            odm_cls_score1,
            feat_labels_one_hot,
            normalizer=num_total_samples,
            reduction='none')
        feat_label_weights = feat_label_weights.reshape(
            feat_label_weights.shape[0], 1)
        feat_label_weights = np.repeat(
            feat_label_weights, self.cls_out_channels, axis=1)
        feat_label_weights = paddle.to_tensor(
            feat_label_weights, stop_gradient=True)
        odm_cls = odm_cls * feat_label_weights
        odm_cls_total = paddle.sum(odm_cls)
        # step3: regression loss
        feat_bbox_targets = paddle.to_tensor(
            feat_bbox_targets, dtype='float32', stop_gradient=True)
        feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
        odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
        odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
        odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
        loss_weight = paddle.to_tensor(
            self.reg_loss_weight, dtype='float32', stop_gradient=True)
        odm_bbox = paddle.multiply(odm_bbox, loss_weight)
        feat_bbox_weights = paddle.to_tensor(
            feat_bbox_weights, stop_gradient=True)
        odm_bbox = odm_bbox * feat_bbox_weights
        odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
        odm_cls_loss_weight = paddle.to_tensor(
            self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
        odm_cls_loss = odm_cls_total * odm_cls_loss_weight
        odm_reg_loss = paddle.add_n(odm_bbox_total)
        return odm_cls_loss, odm_reg_loss
    def get_loss(self, inputs):
        # inputs: im_id image im_shape scale_factor gt_bbox gt_class is_crowd
        # compute loss
        fam_cls_loss_lst = []
        fam_reg_loss_lst = []
        odm_cls_loss_lst = []
        odm_reg_loss_lst = []
        im_shape = inputs['im_shape']
        for im_id in range(im_shape.shape[0]):
            np_im_shape = inputs['im_shape'][im_id].numpy()
            np_scale_factor = inputs['scale_factor'][im_id].numpy()
            # data_format: (xc, yc, w, h, theta)
            gt_bboxes = inputs['gt_rbox'][im_id].numpy()
            gt_labels = inputs['gt_class'][im_id].numpy()
            is_crowd = inputs['is_crowd'][im_id].numpy()
            gt_labels = gt_labels + 1
            # FAM
            for idx, rbox_anchors in enumerate(self.rbox_anchors_list):
                rbox_anchors = rbox_anchors.numpy()
                rbox_anchors = rbox_anchors.reshape(-1, 5)
                im_fam_target = self.anchor_assign(rbox_anchors, gt_bboxes,
                                                   gt_labels, is_crowd)
                # feat
                fam_cls_feat = self.s2anet_head_out[0][idx][im_id]
                fam_reg_feat = self.s2anet_head_out[1][idx][im_id]
                im_s2anet_fam_feat = (fam_cls_feat, fam_reg_feat)
                im_fam_cls_loss, im_fam_reg_loss = self.get_fam_loss(
                    im_fam_target, im_s2anet_fam_feat)
                fam_cls_loss_lst.append(im_fam_cls_loss)
                fam_reg_loss_lst.append(im_fam_reg_loss)
            # ODM
            for idx, refine_anchors in enumerate(self.refine_anchor_list):
                refine_anchors = refine_anchors.numpy()
                refine_anchors = refine_anchors.reshape(-1, 5)
                im_odm_target = self.anchor_assign(refine_anchors, gt_bboxes,
                                                   gt_labels, is_crowd)
                odm_cls_feat = self.s2anet_head_out[2][idx][im_id]
                odm_reg_feat = self.s2anet_head_out[3][idx][im_id]
                im_s2anet_odm_feat = (odm_cls_feat, odm_reg_feat)
                im_odm_cls_loss, im_odm_reg_loss = self.get_odm_loss(
                    im_odm_target, im_s2anet_odm_feat)
                odm_cls_loss_lst.append(im_odm_cls_loss)
                odm_reg_loss_lst.append(im_odm_reg_loss)
        fam_cls_loss = paddle.add_n(fam_cls_loss_lst)
        fam_reg_loss = paddle.add_n(fam_reg_loss_lst)
        odm_cls_loss = paddle.add_n(odm_cls_loss_lst)
        odm_reg_loss = paddle.add_n(odm_reg_loss_lst)
        return {
            'fam_cls_loss': fam_cls_loss,
            'fam_reg_loss': fam_reg_loss,
            'odm_cls_loss': odm_cls_loss,
            'odm_reg_loss': odm_reg_loss
        }
    def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre,
                   cls_out_channels, use_sigmoid_cls):
        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        idx = 0
        for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list,
                                                 mlvl_anchors):
            cls_score = paddle.reshape(cls_score, [-1, cls_out_channels])
            if use_sigmoid_cls:
                scores = F.sigmoid(cls_score)
            else:
                scores = F.softmax(cls_score, axis=-1)
            # bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
            bbox_pred = paddle.transpose(bbox_pred, [1, 2, 0])
            bbox_pred = paddle.reshape(bbox_pred, [-1, 5])
            anchors = paddle.reshape(anchors, [-1, 5])
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                # Get maximum scores for foreground classes.
                if use_sigmoid_cls:
                    max_scores = paddle.max(scores, axis=1)
                else:
                    max_scores = paddle.max(scores[:, 1:], axis=1)
                topk_val, topk_inds = paddle.topk(max_scores, nms_pre)
                anchors = paddle.gather(anchors, topk_inds)
                bbox_pred = paddle.gather(bbox_pred, topk_inds)
                scores = paddle.gather(scores, topk_inds)
            bboxes = self.delta2rbox(anchors, bbox_pred, self.target_means,
                                     self.target_stds)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            idx += 1
        mlvl_bboxes = paddle.concat(mlvl_bboxes, axis=0)
        mlvl_scores = paddle.concat(mlvl_scores)
        if use_sigmoid_cls:
            # Add a dummy background class to the front when using sigmoid
            padding = paddle.zeros(
                [mlvl_scores.shape[0], 1], dtype=mlvl_scores.dtype)
            mlvl_scores = paddle.concat([padding, mlvl_scores], axis=1)
        return mlvl_scores, mlvl_bboxes
--- a/build/lib/ppdet/modeling/heads/solov2_head.py
+++ b/build/lib/ppdet/modeling/heads/solov2_head.py
@ -0,0 +1,530 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, Constant
 from ppdet.modeling.layers import ConvNormLayer
 from ppdet.core.workspace import register
 from six.moves import zip
 import numpy as np
 __all__ = ['SOLOv2Head']
@register
 class SOLOv2MaskHead(nn.Layer):
    """
    MaskHead of SOLOv2
    Args:
        in_channels (int): The channel number of input Tensor.
        out_channels (int): The channel number of output Tensor.
        start_level (int): The position where the input starts.
        end_level (int): The position where the input ends.
        use_dcn_in_tower (bool): Whether to use dcn in tower or not.
    """
    def __init__(self,
                 in_channels=256,
                 mid_channels=128,
                 out_channels=256,
                 start_level=0,
                 end_level=3,
                 use_dcn_in_tower=False):
        super(SOLOv2MaskHead, self).__init__()
        assert start_level >= 0 and end_level >= start_level
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.mid_channels = mid_channels
        self.use_dcn_in_tower = use_dcn_in_tower
        self.range_level = end_level - start_level + 1
        # TODO: add DeformConvNorm
        conv_type = [ConvNormLayer]
        self.conv_func = conv_type[0]
        if self.use_dcn_in_tower:
            self.conv_func = conv_type[1]
        self.convs_all_levels = []
        for i in range(start_level, end_level + 1):
            conv_feat_name = 'mask_feat_head.convs_all_levels.{}'.format(i)
            conv_pre_feat = nn.Sequential()
            if i == start_level:
                conv_pre_feat.add_sublayer(
                    conv_feat_name + '.conv' + str(i),
                    self.conv_func(
                        ch_in=self.in_channels,
                        ch_out=self.mid_channels,
                        filter_size=3,
                        stride=1,
                        norm_type='gn'))
                self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
                self.convs_all_levels.append(conv_pre_feat)
            else:
                for j in range(i):
                    ch_in = 0
                    if j == 0:
                        ch_in = self.in_channels + 2 if i == end_level else self.in_channels
                    else:
                        ch_in = self.mid_channels
                    conv_pre_feat.add_sublayer(
                        conv_feat_name + '.conv' + str(j),
                        self.conv_func(
                            ch_in=ch_in,
                            ch_out=self.mid_channels,
                            filter_size=3,
                            stride=1,
                            norm_type='gn'))
                    conv_pre_feat.add_sublayer(
                        conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU())
                    conv_pre_feat.add_sublayer(
                        'upsample' + str(i) + str(j),
                        nn.Upsample(
                            scale_factor=2, mode='bilinear'))
                self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
                self.convs_all_levels.append(conv_pre_feat)
        conv_pred_name = 'mask_feat_head.conv_pred.0'
        self.conv_pred = self.add_sublayer(
            conv_pred_name,
            self.conv_func(
                ch_in=self.mid_channels,
                ch_out=self.out_channels,
                filter_size=1,
                stride=1,
                norm_type='gn'))
    def forward(self, inputs):
        """
        Get SOLOv2MaskHead output.
        Args:
            inputs(list[Tensor]): feature map from each necks with shape of [N, C, H, W]
        Returns:
            ins_pred(Tensor): Output of SOLOv2MaskHead head
        """
        feat_all_level = F.relu(self.convs_all_levels[0](inputs[0]))
        for i in range(1, self.range_level):
            input_p = inputs[i]
            if i == (self.range_level - 1):
                input_feat = input_p
                x_range = paddle.linspace(
                    -1, 1, paddle.shape(input_feat)[-1], dtype='float32')
                y_range = paddle.linspace(
                    -1, 1, paddle.shape(input_feat)[-2], dtype='float32')
                y, x = paddle.meshgrid([y_range, x_range])
                x = paddle.unsqueeze(x, [0, 1])
                y = paddle.unsqueeze(y, [0, 1])
                y = paddle.expand(
                    y, shape=[paddle.shape(input_feat)[0], 1, -1, -1])
                x = paddle.expand(
                    x, shape=[paddle.shape(input_feat)[0], 1, -1, -1])
                coord_feat = paddle.concat([x, y], axis=1)
                input_p = paddle.concat([input_p, coord_feat], axis=1)
            feat_all_level = paddle.add(feat_all_level,
                                        self.convs_all_levels[i](input_p))
        ins_pred = F.relu(self.conv_pred(feat_all_level))
        return ins_pred
@register
 class SOLOv2Head(nn.Layer):
    """
    Head block for SOLOv2 network
    Args:
        num_classes (int): Number of output classes.
        in_channels (int): Number of input channels.
        seg_feat_channels (int): Num_filters of kernel & categroy branch convolution operation.
        stacked_convs (int): Times of convolution operation.
        num_grids (list[int]): List of feature map grids size.
        kernel_out_channels (int): Number of output channels in kernel branch.
        dcn_v2_stages (list): Which stage use dcn v2 in tower. It is between [0, stacked_convs).
        segm_strides (list[int]): List of segmentation area stride.
        solov2_loss (object): SOLOv2Loss instance.
        score_threshold (float): Threshold of categroy score.
        mask_nms (object): MaskMatrixNMS instance.
    """
    __inject__ = ['solov2_loss', 'mask_nms']
    __shared__ = ['num_classes']
    def __init__(self,
                 num_classes=80,
                 in_channels=256,
                 seg_feat_channels=256,
                 stacked_convs=4,
                 num_grids=[40, 36, 24, 16, 12],
                 kernel_out_channels=256,
                 dcn_v2_stages=[],
                 segm_strides=[8, 8, 16, 32, 32],
                 solov2_loss=None,
                 score_threshold=0.1,
                 mask_threshold=0.5,
                 mask_nms=None):
        super(SOLOv2Head, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.seg_num_grids = num_grids
        self.cate_out_channels = self.num_classes
        self.seg_feat_channels = seg_feat_channels
        self.stacked_convs = stacked_convs
        self.kernel_out_channels = kernel_out_channels
        self.dcn_v2_stages = dcn_v2_stages
        self.segm_strides = segm_strides
        self.solov2_loss = solov2_loss
        self.mask_nms = mask_nms
        self.score_threshold = score_threshold
        self.mask_threshold = mask_threshold
        conv_type = [ConvNormLayer]
        self.conv_func = conv_type[0]
        self.kernel_pred_convs = []
        self.cate_pred_convs = []
        for i in range(self.stacked_convs):
            if i in self.dcn_v2_stages:
                self.conv_func = conv_type[1]
            ch_in = self.in_channels + 2 if i == 0 else self.seg_feat_channels
            kernel_conv = self.add_sublayer(
                'bbox_head.kernel_convs.' + str(i),
                self.conv_func(
                    ch_in=ch_in,
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
                    norm_type='gn'))
            self.kernel_pred_convs.append(kernel_conv)
            ch_in = self.in_channels if i == 0 else self.seg_feat_channels
            cate_conv = self.add_sublayer(
                'bbox_head.cate_convs.' + str(i),
                self.conv_func(
                    ch_in=ch_in,
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
                    norm_type='gn'))
            self.cate_pred_convs.append(cate_conv)
        self.solo_kernel = self.add_sublayer(
            'bbox_head.solo_kernel',
            nn.Conv2D(
                self.seg_feat_channels,
                self.kernel_out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                weight_attr=ParamAttr(initializer=Normal(
                    mean=0., std=0.01)),
                bias_attr=True))
        self.solo_cate = self.add_sublayer(
            'bbox_head.solo_cate',
            nn.Conv2D(
                self.seg_feat_channels,
                self.cate_out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                weight_attr=ParamAttr(initializer=Normal(
                    mean=0., std=0.01)),
                bias_attr=ParamAttr(initializer=Constant(
                    value=float(-np.log((1 - 0.01) / 0.01))))))
    def _points_nms(self, heat, kernel_size=2):
        hmax = F.max_pool2d(heat, kernel_size=kernel_size, stride=1, padding=1)
        keep = paddle.cast((hmax[:, :, :-1, :-1] == heat), 'float32')
        return heat * keep
    def _split_feats(self, feats):
        return (F.interpolate(
            feats[0],
            scale_factor=0.5,
            align_corners=False,
            align_mode=0,
            mode='bilinear'), feats[1], feats[2], feats[3], F.interpolate(
                feats[4],
                size=paddle.shape(feats[3])[-2:],
                mode='bilinear',
                align_corners=False,
                align_mode=0))
    def forward(self, input):
        """
        Get SOLOv2 head output
        Args:
            input (list): List of Tensors, output of backbone or neck stages
        Returns:
            cate_pred_list (list): Tensors of each category branch layer
            kernel_pred_list (list): Tensors of each kernel branch layer
        """
        feats = self._split_feats(input)
        cate_pred_list = []
        kernel_pred_list = []
        for idx in range(len(self.seg_num_grids)):
            cate_pred, kernel_pred = self._get_output_single(feats[idx], idx)
            cate_pred_list.append(cate_pred)
            kernel_pred_list.append(kernel_pred)
        return cate_pred_list, kernel_pred_list
    def _get_output_single(self, input, idx):
        ins_kernel_feat = input
        # CoordConv
        x_range = paddle.linspace(
            -1, 1, paddle.shape(ins_kernel_feat)[-1], dtype='float32')
        y_range = paddle.linspace(
            -1, 1, paddle.shape(ins_kernel_feat)[-2], dtype='float32')
        y, x = paddle.meshgrid([y_range, x_range])
        x = paddle.unsqueeze(x, [0, 1])
        y = paddle.unsqueeze(y, [0, 1])
        y = paddle.expand(
            y, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1])
        x = paddle.expand(
            x, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1])
        coord_feat = paddle.concat([x, y], axis=1)
        ins_kernel_feat = paddle.concat([ins_kernel_feat, coord_feat], axis=1)
        # kernel branch
        kernel_feat = ins_kernel_feat
        seg_num_grid = self.seg_num_grids[idx]
        kernel_feat = F.interpolate(
            kernel_feat,
            size=[seg_num_grid, seg_num_grid],
            mode='bilinear',
            align_corners=False,
            align_mode=0)
        cate_feat = kernel_feat[:, :-2, :, :]
        for kernel_layer in self.kernel_pred_convs:
            kernel_feat = F.relu(kernel_layer(kernel_feat))
        kernel_pred = self.solo_kernel(kernel_feat)
        # cate branch
        for cate_layer in self.cate_pred_convs:
            cate_feat = F.relu(cate_layer(cate_feat))
        cate_pred = self.solo_cate(cate_feat)
        if not self.training:
            cate_pred = self._points_nms(F.sigmoid(cate_pred), kernel_size=2)
            cate_pred = paddle.transpose(cate_pred, [0, 2, 3, 1])
        return cate_pred, kernel_pred
    def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels,
                 cate_labels, grid_order_list, fg_num):
        """
        Get loss of network of SOLOv2.
        Args:
            cate_preds (list): Tensor list of categroy branch output.
            kernel_preds (list): Tensor list of kernel branch output.
            ins_pred (list): Tensor list of instance branch output.
            ins_labels (list): List of instance labels pre batch.
            cate_labels (list): List of categroy labels pre batch.
            grid_order_list (list): List of index in pre grid.
            fg_num (int): Number of positive samples in a mini-batch.
        Returns:
            loss_ins (Tensor): The instance loss Tensor of SOLOv2 network.
            loss_cate (Tensor): The category loss Tensor of SOLOv2 network.
        """
        batch_size = paddle.shape(grid_order_list[0])[0]
        ins_pred_list = []
        for kernel_preds_level, grid_orders_level in zip(kernel_preds,
                                                         grid_order_list):
            if grid_orders_level.shape[1] == 0:
                ins_pred_list.append(None)
                continue
            grid_orders_level = paddle.reshape(grid_orders_level, [-1])
            reshape_pred = paddle.reshape(
                kernel_preds_level,
                shape=(paddle.shape(kernel_preds_level)[0],
                       paddle.shape(kernel_preds_level)[1], -1))
            reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1])
            reshape_pred = paddle.reshape(
                reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2]))
            gathered_pred = paddle.gather(reshape_pred, index=grid_orders_level)
            gathered_pred = paddle.reshape(
                gathered_pred,
                shape=[batch_size, -1, paddle.shape(gathered_pred)[1]])
            cur_ins_pred = ins_pred
            cur_ins_pred = paddle.reshape(
                cur_ins_pred,
                shape=(paddle.shape(cur_ins_pred)[0],
                       paddle.shape(cur_ins_pred)[1], -1))
            ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred)
            cur_ins_pred = paddle.reshape(
                ins_pred_conv,
                shape=(-1, paddle.shape(ins_pred)[-2],
                       paddle.shape(ins_pred)[-1]))
            ins_pred_list.append(cur_ins_pred)
        num_ins = paddle.sum(fg_num)
        cate_preds = [
            paddle.reshape(
                paddle.transpose(cate_pred, [0, 2, 3, 1]),
                shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds
        ]
        flatten_cate_preds = paddle.concat(cate_preds)
        new_cate_labels = []
        for cate_label in cate_labels:
            new_cate_labels.append(paddle.reshape(cate_label, shape=[-1]))
        cate_labels = paddle.concat(new_cate_labels)
        loss_ins, loss_cate = self.solov2_loss(
            ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins)
        return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
    def get_prediction(self, cate_preds, kernel_preds, seg_pred, im_shape,
                       scale_factor):
        """
        Get prediction result of SOLOv2 network
        Args:
            cate_preds (list): List of Variables, output of categroy branch.
            kernel_preds (list): List of Variables, output of kernel branch.
            seg_pred (list): List of Variables, output of mask head stages.
            im_shape (Variables): [h, w] for input images.
            scale_factor (Variables): [scale, scale] for input images.
        Returns:
            seg_masks (Tensor): The prediction segmentation.
            cate_labels (Tensor): The prediction categroy label of each segmentation.
            seg_masks (Tensor): The prediction score of each segmentation.
        """
        num_levels = len(cate_preds)
        featmap_size = paddle.shape(seg_pred)[-2:]
        seg_masks_list = []
        cate_labels_list = []
        cate_scores_list = []
        cate_preds = [cate_pred * 1.0 for cate_pred in cate_preds]
        kernel_preds = [kernel_pred * 1.0 for kernel_pred in kernel_preds]
        # Currently only supports batch size == 1
        for idx in range(1):
            cate_pred_list = [
                paddle.reshape(
                    cate_preds[i][idx], shape=(-1, self.cate_out_channels))
                for i in range(num_levels)
            ]
            seg_pred_list = seg_pred
            kernel_pred_list = [
                paddle.reshape(
                    paddle.transpose(kernel_preds[i][idx], [1, 2, 0]),
                    shape=(-1, self.kernel_out_channels))
                for i in range(num_levels)
            ]
            cate_pred_list = paddle.concat(cate_pred_list, axis=0)
            kernel_pred_list = paddle.concat(kernel_pred_list, axis=0)
            seg_masks, cate_labels, cate_scores = self.get_seg_single(
                cate_pred_list, seg_pred_list, kernel_pred_list, featmap_size,
                im_shape[idx], scale_factor[idx][0])
            bbox_num = paddle.shape(cate_labels)[0]
        return seg_masks, cate_labels, cate_scores, bbox_num
    def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
                       im_shape, scale_factor):
        h = paddle.cast(im_shape[0], 'int32')[0]
        w = paddle.cast(im_shape[1], 'int32')[0]
        upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4]
        y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32')
        inds = paddle.where(cate_preds > self.score_threshold, cate_preds, y)
        inds = paddle.nonzero(inds)
        cate_preds = paddle.reshape(cate_preds, shape=[-1])
        # Prevent empty and increase fake data
        ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64')
        ind_b = paddle.zeros(shape=[1], dtype='int64')
        inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0)
        inds = paddle.concat([inds, inds_end])
        kernel_preds_end = paddle.ones(
            shape=[1, self.kernel_out_channels], dtype='float32')
        kernel_preds = paddle.concat([kernel_preds, kernel_preds_end])
        cate_preds = paddle.concat(
            [cate_preds, paddle.zeros(
                shape=[1], dtype='float32')])
        # cate_labels & kernel_preds
        cate_labels = inds[:, 1]
        kernel_preds = paddle.gather(kernel_preds, index=inds[:, 0])
        cate_score_idx = paddle.add(inds[:, 0] * 80, cate_labels)
        cate_scores = paddle.gather(cate_preds, index=cate_score_idx)
        size_trans = np.power(self.seg_num_grids, 2)
        strides = []
        for _ind in range(len(self.segm_strides)):
            strides.append(
                paddle.full(
                    shape=[int(size_trans[_ind])],
                    fill_value=self.segm_strides[_ind],
                    dtype="int32"))
        strides = paddle.concat(strides)
        strides = paddle.gather(strides, index=inds[:, 0])
        # mask encoding.
        kernel_preds = paddle.unsqueeze(kernel_preds, [2, 3])
        seg_preds = F.conv2d(seg_preds, kernel_preds)
        seg_preds = F.sigmoid(paddle.squeeze(seg_preds, [0]))
        seg_masks = seg_preds > self.mask_threshold
        seg_masks = paddle.cast(seg_masks, 'float32')
        sum_masks = paddle.sum(seg_masks, axis=[1, 2])
        y = paddle.zeros(shape=paddle.shape(sum_masks), dtype='float32')
        keep = paddle.where(sum_masks > strides, sum_masks, y)
        keep = paddle.nonzero(keep)
        keep = paddle.squeeze(keep, axis=[1])
        # Prevent empty and increase fake data
        keep_other = paddle.concat(
            [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')])
        keep_scores = paddle.concat(
            [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')])
        cate_scores_end = paddle.zeros(shape=[1], dtype='float32')
        cate_scores = paddle.concat([cate_scores, cate_scores_end])
        seg_masks = paddle.gather(seg_masks, index=keep_other)
        seg_preds = paddle.gather(seg_preds, index=keep_other)
        sum_masks = paddle.gather(sum_masks, index=keep_other)
        cate_labels = paddle.gather(cate_labels, index=keep_other)
        cate_scores = paddle.gather(cate_scores, index=keep_scores)
        # mask scoring.
        seg_mul = paddle.cast(seg_preds * seg_masks, 'float32')
        seg_scores = paddle.sum(seg_mul, axis=[1, 2]) / sum_masks
        cate_scores *= seg_scores
        # Matrix NMS
        seg_preds, cate_scores, cate_labels = self.mask_nms(
            seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=sum_masks)
        ori_shape = im_shape[:2] / scale_factor + 0.5
        ori_shape = paddle.cast(ori_shape, 'int32')
        seg_preds = F.interpolate(
            paddle.unsqueeze(seg_preds, 0),
            size=upsampled_size_out,
            mode='bilinear',
            align_corners=False,
            align_mode=0)
        seg_preds = paddle.slice(
            seg_preds, axes=[2, 3], starts=[0, 0], ends=[h, w])
        seg_masks = paddle.squeeze(
            F.interpolate(
                seg_preds,
                size=ori_shape[:2],
                mode='bilinear',
                align_corners=False,
                align_mode=0),
            axis=[0])
        seg_masks = paddle.cast(seg_masks > self.mask_threshold, 'uint8')
        return seg_masks, cate_labels, cate_scores
--- a/build/lib/ppdet/modeling/heads/ssd_head.py
+++ b/build/lib/ppdet/modeling/heads/ssd_head.py
@ -0,0 +1,175 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 #   
 # Licensed under the Apache License, Version 2.0 (the "License");   
 # you may not use this file except in compliance with the License.  
 # You may obtain a copy of the License at   
 #   
 #     http://www.apache.org/licenses/LICENSE-2.0    
 #   
 # Unless required by applicable law or agreed to in writing, software   
 # distributed under the License is distributed on an "AS IS" BASIS, 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
 # See the License for the specific language governing permissions and   
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register
 from paddle.regularizer import L2Decay
 from paddle import ParamAttr
 from ..layers import AnchorGeneratorSSD
 class SepConvLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 padding=1,
                 conv_decay=0):
        super(SepConvLayer, self).__init__()
        self.dw_conv = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=kernel_size,
            stride=1,
            padding=padding,
            groups=in_channels,
            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
            bias_attr=False)
        self.bn = nn.BatchNorm2D(
            in_channels,
            weight_attr=ParamAttr(regularizer=L2Decay(0.)),
            bias_attr=ParamAttr(regularizer=L2Decay(0.)))
        self.pw_conv = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=1,
            stride=1,
            padding=0,
            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
            bias_attr=False)
    def forward(self, x):
        x = self.dw_conv(x)
        x = F.relu6(self.bn(x))
        x = self.pw_conv(x)
        return x
@register
 class SSDHead(nn.Layer):
    """
    SSDHead
    Args:
        num_classes (int): Number of classes
        in_channels (list): Number of channels per input feature
        anchor_generator (dict): Configuration of 'AnchorGeneratorSSD' instance
        kernel_size (int): Conv kernel size
        padding (int): Conv padding
        use_sepconv (bool): Use SepConvLayer if true
        conv_decay (float): Conv regularization coeff
        loss (object): 'SSDLoss' instance
    """
    __shared__ = ['num_classes']
    __inject__ = ['anchor_generator', 'loss']
    def __init__(self,
                 num_classes=80,
                 in_channels=(512, 1024, 512, 256, 256, 256),
                 anchor_generator=AnchorGeneratorSSD().__dict__,
                 kernel_size=3,
                 padding=1,
                 use_sepconv=False,
                 conv_decay=0.,
                 loss='SSDLoss'):
        super(SSDHead, self).__init__()
        # add background class
        self.num_classes = num_classes + 1
        self.in_channels = in_channels
        self.anchor_generator = anchor_generator
        self.loss = loss
        if isinstance(anchor_generator, dict):
            self.anchor_generator = AnchorGeneratorSSD(**anchor_generator)
        self.num_priors = self.anchor_generator.num_priors
        self.box_convs = []
        self.score_convs = []
        for i, num_prior in enumerate(self.num_priors):
            box_conv_name = "boxes{}".format(i)
            if not use_sepconv:
                box_conv = self.add_sublayer(
                    box_conv_name,
                    nn.Conv2D(
                        in_channels=in_channels[i],
                        out_channels=num_prior * 4,
                        kernel_size=kernel_size,
                        padding=padding))
            else:
                box_conv = self.add_sublayer(
                    box_conv_name,
                    SepConvLayer(
                        in_channels=in_channels[i],
                        out_channels=num_prior * 4,
                        kernel_size=kernel_size,
                        padding=padding,
                        conv_decay=conv_decay))
            self.box_convs.append(box_conv)
            score_conv_name = "scores{}".format(i)
            if not use_sepconv:
                score_conv = self.add_sublayer(
                    score_conv_name,
                    nn.Conv2D(
                        in_channels=in_channels[i],
                        out_channels=num_prior * self.num_classes,
                        kernel_size=kernel_size,
                        padding=padding))
            else:
                score_conv = self.add_sublayer(
                    score_conv_name,
                    SepConvLayer(
                        in_channels=in_channels[i],
                        out_channels=num_prior * self.num_classes,
                        kernel_size=kernel_size,
                        padding=padding,
                        conv_decay=conv_decay))
            self.score_convs.append(score_conv)
    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }
    def forward(self, feats, image, gt_bbox=None, gt_class=None):
        box_preds = []
        cls_scores = []
        prior_boxes = []
        for feat, box_conv, score_conv in zip(feats, self.box_convs,
                                              self.score_convs):
            box_pred = box_conv(feat)
            box_pred = paddle.transpose(box_pred, [0, 2, 3, 1])
            box_pred = paddle.reshape(box_pred, [0, -1, 4])
            box_preds.append(box_pred)
            cls_score = score_conv(feat)
            cls_score = paddle.transpose(cls_score, [0, 2, 3, 1])
            cls_score = paddle.reshape(cls_score, [0, -1, self.num_classes])
            cls_scores.append(cls_score)
        prior_boxes = self.anchor_generator(feats, image)
        if self.training:
            return self.get_loss(box_preds, cls_scores, gt_bbox, gt_class,
                                 prior_boxes)
        else:
            return (box_preds, cls_scores), prior_boxes
    def get_loss(self, boxes, scores, gt_bbox, gt_class, prior_boxes):
        return self.loss(boxes, scores, gt_bbox, gt_class, prior_boxes)
--- a/build/lib/ppdet/modeling/heads/ttf_head.py
+++ b/build/lib/ppdet/modeling/heads/ttf_head.py
@ -0,0 +1,309 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.nn.initializer import Constant, Normal
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register
 from ppdet.modeling.layers import DeformableConvV2, LiteConv
 import numpy as np
@register
 class HMHead(nn.Layer):
    """
    Args:
        ch_in (int): The channel number of input Tensor.
        ch_out (int): The channel number of output Tensor.
        num_classes (int): Number of classes.
        conv_num (int): The convolution number of hm_feat.
        dcn_head(bool): whether use dcn in head. False by default. 
        lite_head(bool): whether use lite version. False by default.
        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
            bn by default
    Return:
        Heatmap head output
    """
    __shared__ = ['num_classes', 'norm_type']
    def __init__(
            self,
            ch_in,
            ch_out=128,
            num_classes=80,
            conv_num=2,
            dcn_head=False,
            lite_head=False,
            norm_type='bn', ):
        super(HMHead, self).__init__()
        head_conv = nn.Sequential()
        for i in range(conv_num):
            name = 'conv.{}'.format(i)
            if lite_head:
                lite_name = 'hm.' + name
                head_conv.add_sublayer(
                    lite_name,
                    LiteConv(
                        in_channels=ch_in if i == 0 else ch_out,
                        out_channels=ch_out,
                        norm_type=norm_type))
                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
            else:
                if dcn_head:
                    head_conv.add_sublayer(
                        name,
                        DeformableConvV2(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
                else:
                    head_conv.add_sublayer(
                        name,
                        nn.Conv2D(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            padding=1,
                            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                            bias_attr=ParamAttr(
                                learning_rate=2., regularizer=L2Decay(0.))))
                head_conv.add_sublayer(name + '.act', nn.ReLU())
        self.feat = head_conv
        bias_init = float(-np.log((1 - 0.01) / 0.01))
        self.head = nn.Conv2D(
            in_channels=ch_out,
            out_channels=num_classes,
            kernel_size=1,
            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
            bias_attr=ParamAttr(
                learning_rate=2.,
                regularizer=L2Decay(0.),
                initializer=Constant(bias_init)))
    def forward(self, feat):
        out = self.feat(feat)
        out = self.head(out)
        return out
@register
 class WHHead(nn.Layer):
    """
    Args:
        ch_in (int): The channel number of input Tensor.
        ch_out (int): The channel number of output Tensor.
        conv_num (int): The convolution number of wh_feat.
        dcn_head(bool): whether use dcn in head. False by default.
        lite_head(bool): whether use lite version. False by default.
        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
            bn by default
    Return:
        Width & Height head output
    """
    __shared__ = ['norm_type']
    def __init__(self,
                 ch_in,
                 ch_out=64,
                 conv_num=2,
                 dcn_head=False,
                 lite_head=False,
                 norm_type='bn'):
        super(WHHead, self).__init__()
        head_conv = nn.Sequential()
        for i in range(conv_num):
            name = 'conv.{}'.format(i)
            if lite_head:
                lite_name = 'wh.' + name
                head_conv.add_sublayer(
                    lite_name,
                    LiteConv(
                        in_channels=ch_in if i == 0 else ch_out,
                        out_channels=ch_out,
                        norm_type=norm_type))
                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
            else:
                if dcn_head:
                    head_conv.add_sublayer(
                        name,
                        DeformableConvV2(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
                else:
                    head_conv.add_sublayer(
                        name,
                        nn.Conv2D(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            padding=1,
                            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                            bias_attr=ParamAttr(
                                learning_rate=2., regularizer=L2Decay(0.))))
                head_conv.add_sublayer(name + '.act', nn.ReLU())
        self.feat = head_conv
        self.head = nn.Conv2D(
            in_channels=ch_out,
            out_channels=4,
            kernel_size=1,
            weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
            bias_attr=ParamAttr(
                learning_rate=2., regularizer=L2Decay(0.)))
    def forward(self, feat):
        out = self.feat(feat)
        out = self.head(out)
        out = F.relu(out)
        return out
@register
 class TTFHead(nn.Layer):
    """
    TTFHead
    Args:
        in_channels (int): the channel number of input to TTFHead.
        num_classes (int): the number of classes, 80 by default.
        hm_head_planes (int): the channel number in heatmap head,
            128 by default.
        wh_head_planes (int): the channel number in width & height head,
            64 by default.
        hm_head_conv_num (int): the number of convolution in heatmap head,
            2 by default.
        wh_head_conv_num (int): the number of convolution in width & height
            head, 2 by default.
        hm_loss (object): Instance of 'CTFocalLoss'.
        wh_loss (object): Instance of 'GIoULoss'.
        wh_offset_base (float): the base offset of width and height,
            16.0 by default.
        down_ratio (int): the actual down_ratio is calculated by base_down_ratio
            (default 16) and the number of upsample layers.
        lite_head(bool): whether use lite version. False by default.
        norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
            bn by default
        ags_module(bool): whether use AGS module to reweight location feature.
            false by default.
    """
    __shared__ = ['num_classes', 'down_ratio', 'norm_type']
    __inject__ = ['hm_loss', 'wh_loss']
    def __init__(self,
                 in_channels,
                 num_classes=80,
                 hm_head_planes=128,
                 wh_head_planes=64,
                 hm_head_conv_num=2,
                 wh_head_conv_num=2,
                 hm_loss='CTFocalLoss',
                 wh_loss='GIoULoss',
                 wh_offset_base=16.,
                 down_ratio=4,
                 dcn_head=False,
                 lite_head=False,
                 norm_type='bn',
                 ags_module=False):
        super(TTFHead, self).__init__()
        self.in_channels = in_channels
        self.hm_head = HMHead(in_channels, hm_head_planes, num_classes,
                              hm_head_conv_num, dcn_head, lite_head, norm_type)
        self.wh_head = WHHead(in_channels, wh_head_planes, wh_head_conv_num,
                              dcn_head, lite_head, norm_type)
        self.hm_loss = hm_loss
        self.wh_loss = wh_loss
        self.wh_offset_base = wh_offset_base
        self.down_ratio = down_ratio
        self.ags_module = ags_module
    @classmethod
    def from_config(cls, cfg, input_shape):
        if isinstance(input_shape, (list, tuple)):
            input_shape = input_shape[0]
        return {'in_channels': input_shape.channels, }
    def forward(self, feats):
        hm = self.hm_head(feats)
        wh = self.wh_head(feats) * self.wh_offset_base
        return hm, wh
    def filter_box_by_weight(self, pred, target, weight):
        """
        Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
        """
        index = paddle.nonzero(weight > 0)
        index.stop_gradient = True
        weight = paddle.gather_nd(weight, index)
        pred = paddle.gather_nd(pred, index)
        target = paddle.gather_nd(target, index)
        return pred, target, weight
    def filter_loc_by_weight(self, score, weight):
        index = paddle.nonzero(weight > 0)
        index.stop_gradient = True
        score = paddle.gather_nd(score, index)
        return score
    def get_loss(self, pred_hm, pred_wh, target_hm, box_target, target_weight):
        pred_hm = paddle.clip(F.sigmoid(pred_hm), 1e-4, 1 - 1e-4)
        hm_loss = self.hm_loss(pred_hm, target_hm)
        H, W = target_hm.shape[2:]
        mask = paddle.reshape(target_weight, [-1, H, W])
        avg_factor = paddle.sum(mask) + 1e-4
        base_step = self.down_ratio
        shifts_x = paddle.arange(0, W * base_step, base_step, dtype='int32')
        shifts_y = paddle.arange(0, H * base_step, base_step, dtype='int32')
        shift_y, shift_x = paddle.tensor.meshgrid([shifts_y, shifts_x])
        base_loc = paddle.stack([shift_x, shift_y], axis=0)
        base_loc.stop_gradient = True
        pred_boxes = paddle.concat(
            [0 - pred_wh[:, 0:2, :, :] + base_loc, pred_wh[:, 2:4] + base_loc],
            axis=1)
        pred_boxes = paddle.transpose(pred_boxes, [0, 2, 3, 1])
        boxes = paddle.transpose(box_target, [0, 2, 3, 1])
        boxes.stop_gradient = True
        if self.ags_module:
            pred_hm_max = paddle.max(pred_hm, axis=1, keepdim=True)
            pred_hm_max_softmax = F.softmax(pred_hm_max, axis=1)
            pred_hm_max_softmax = paddle.transpose(pred_hm_max_softmax,
                                                   [0, 2, 3, 1])
            pred_hm_max_softmax = self.filter_loc_by_weight(pred_hm_max_softmax,
                                                            mask)
        else:
            pred_hm_max_softmax = None
        pred_boxes, boxes, mask = self.filter_box_by_weight(pred_boxes, boxes,
                                                            mask)
        mask.stop_gradient = True
        wh_loss = self.wh_loss(
            pred_boxes,
            boxes,
            iou_weight=mask.unsqueeze(1),
            loc_reweight=pred_hm_max_softmax)
        wh_loss = wh_loss / avg_factor
        ttf_loss = {'hm_loss': hm_loss, 'wh_loss': wh_loss}
        return ttf_loss
--- a/build/lib/ppdet/modeling/heads/yolo_head.py
+++ b/build/lib/ppdet/modeling/heads/yolo_head.py
@ -0,0 +1,124 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register
 def _de_sigmoid(x, eps=1e-7):
    x = paddle.clip(x, eps, 1. / eps)
    x = paddle.clip(1. / x - 1., eps, 1. / eps)
    x = -paddle.log(x)
    return x
@register
 class YOLOv3Head(nn.Layer):
    __shared__ = ['num_classes', 'data_format']
    __inject__ = ['loss']
    def __init__(self,
                 in_channels=[1024, 512, 256],
                 anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
                          [59, 119], [116, 90], [156, 198], [373, 326]],
                 anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
                 num_classes=80,
                 loss='YOLOv3Loss',
                 iou_aware=False,
                 iou_aware_factor=0.4,
                 data_format='NCHW'):
        """
        Head for YOLOv3 network
        Args:
            num_classes (int): number of foreground classes
            anchors (list): anchors
            anchor_masks (list): anchor masks
            loss (object): YOLOv3Loss instance
            iou_aware (bool): whether to use iou_aware
            iou_aware_factor (float): iou aware factor
            data_format (str): data format, NCHW or NHWC
        """
        super(YOLOv3Head, self).__init__()
        assert len(in_channels) > 0, "in_channels length should > 0"
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.loss = loss
        self.iou_aware = iou_aware
        self.iou_aware_factor = iou_aware_factor
        self.parse_anchor(anchors, anchor_masks)
        self.num_outputs = len(self.anchors)
        self.data_format = data_format
        self.yolo_outputs = []
        for i in range(len(self.anchors)):
            if self.iou_aware:
                num_filters = len(self.anchors[i]) * (self.num_classes + 6)
            else:
                num_filters = len(self.anchors[i]) * (self.num_classes + 5)
            name = 'yolo_output.{}'.format(i)
            conv = nn.Conv2D(
                in_channels=self.in_channels[i],
                out_channels=num_filters,
                kernel_size=1,
                stride=1,
                padding=0,
                data_format=data_format,
                bias_attr=ParamAttr(regularizer=L2Decay(0.)))
            conv.skip_quant = True
            yolo_output = self.add_sublayer(name, conv)
            self.yolo_outputs.append(yolo_output)
    def parse_anchor(self, anchors, anchor_masks):
        self.anchors = [[anchors[i] for i in mask] for mask in anchor_masks]
        self.mask_anchors = []
        anchor_num = len(anchors)
        for masks in anchor_masks:
            self.mask_anchors.append([])
            for mask in masks:
                assert mask < anchor_num, "anchor mask index overflow"
                self.mask_anchors[-1].extend(anchors[mask])
    def forward(self, feats, targets=None):
        assert len(feats) == len(self.anchors)
        yolo_outputs = []
        for i, feat in enumerate(feats):
            yolo_output = self.yolo_outputs[i](feat)
            if self.data_format == 'NHWC':
                yolo_output = paddle.transpose(yolo_output, [0, 3, 1, 2])
            yolo_outputs.append(yolo_output)
        if self.training:
            return self.loss(yolo_outputs, targets, self.anchors)
        else:
            if self.iou_aware:
                y = []
                for i, out in enumerate(yolo_outputs):
                    na = len(self.anchors[i])
                    ioup, x = out[:, 0:na, :, :], out[:, na:, :, :]
                    b, c, h, w = x.shape
                    no = c // na
                    x = x.reshape((b, na, no, h * w))
                    ioup = ioup.reshape((b, na, 1, h * w))
                    obj = x[:, :, 4:5, :]
                    ioup = F.sigmoid(ioup)
                    obj = F.sigmoid(obj)
                    obj_t = (obj**(1 - self.iou_aware_factor)) * (
                        ioup**self.iou_aware_factor)
                    obj_t = _de_sigmoid(obj_t)
                    loc_t = x[:, :, :4, :]
                    cls_t = x[:, :, 5:, :]
                    y_t = paddle.concat([loc_t, obj_t, cls_t], axis=2)
                    y_t = y_t.reshape((b, c, h, w))
                    y.append(y_t)
                return y
            else:
                return yolo_outputs
    @classmethod
    def from_config(cls, cfg, input_shape):
        return {'in_channels': [i.channels for i in input_shape], }
--- a/build/lib/ppdet/modeling/keypoint_utils.py
+++ b/build/lib/ppdet/modeling/keypoint_utils.py
@ -0,0 +1,302 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import cv2
 import numpy as np
 def get_affine_mat_kernel(h, w, s, inv=False):
    if w < h:
        w_ = s
        h_ = int(np.ceil((s / w * h) / 64.) * 64)
        scale_w = w
        scale_h = h_ / w_ * w
    else:
        h_ = s
        w_ = int(np.ceil((s / h * w) / 64.) * 64)
        scale_h = h
        scale_w = w_ / h_ * h
    center = np.array([np.round(w / 2.), np.round(h / 2.)])
    size_resized = (w_, h_)
    trans = get_affine_transform(
        center, np.array([scale_w, scale_h]), 0, size_resized, inv=inv)
    return trans, size_resized
 def get_affine_transform(center,
                         input_size,
                         rot,
                         output_size,
                         shift=(0., 0.),
                         inv=False):
    """Get the affine transform matrix, given the center/scale/rot/output_size.
    Args:
        center (np.ndarray[2, ]): Center of the bounding box (x, y).
        scale (np.ndarray[2, ]): Scale of the bounding box
            wrt [width, height].
        rot (float): Rotation angle (degree).
        output_size (np.ndarray[2, ]): Size of the destination heatmaps.
        shift (0-100%): Shift translation ratio wrt the width/height.
            Default (0., 0.).
        inv (bool): Option to inverse the affine transform direction.
            (inv=False: src->dst or inv=True: dst->src)
    Returns:
        np.ndarray: The transform matrix.
    """
    assert len(center) == 2
    assert len(input_size) == 2
    assert len(output_size) == 2
    assert len(shift) == 2
    scale_tmp = input_size
    shift = np.array(shift)
    src_w = scale_tmp[0]
    dst_w = output_size[0]
    dst_h = output_size[1]
    rot_rad = np.pi * rot / 180
    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
    dst_dir = np.array([0., dst_w * -0.5])
    src = np.zeros((3, 2), dtype=np.float32)
    src[0, :] = center + scale_tmp * shift
    src[1, :] = center + src_dir + scale_tmp * shift
    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
    dst = np.zeros((3, 2), dtype=np.float32)
    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
    if inv:
        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
    else:
        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
    return trans
 def _get_3rd_point(a, b):
    """To calculate the affine matrix, three pairs of points are required. This
    function is used to get the 3rd point, given 2D points a & b.
    The 3rd point is defined by rotating vector `a - b` by 90 degrees
    anticlockwise, using b as the rotation center.
    Args:
        a (np.ndarray): point(x,y)
        b (np.ndarray): point(x,y)
    Returns:
        np.ndarray: The 3rd point.
    """
    assert len(a) == 2
    assert len(b) == 2
    direction = a - b
    third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
    return third_pt
 def rotate_point(pt, angle_rad):
    """Rotate a point by an angle.
    Args:
        pt (list[float]): 2 dimensional point to be rotated
        angle_rad (float): rotation angle by radian
    Returns:
        list[float]: Rotated point.
    """
    assert len(pt) == 2
    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
    new_x = pt[0] * cs - pt[1] * sn
    new_y = pt[0] * sn + pt[1] * cs
    rotated_pt = [new_x, new_y]
    return rotated_pt
 def transpred(kpts, h, w, s):
    trans, _ = get_affine_mat_kernel(h, w, s, inv=True)
    return warp_affine_joints(kpts[..., :2].copy(), trans)
 def warp_affine_joints(joints, mat):
    """Apply affine transformation defined by the transform matrix on the
    joints.
    Args:
        joints (np.ndarray[..., 2]): Origin coordinate of joints.
        mat (np.ndarray[3, 2]): The affine matrix.
    Returns:
        matrix (np.ndarray[..., 2]): Result coordinate of joints.
    """
    joints = np.array(joints)
    shape = joints.shape
    joints = joints.reshape(-1, 2)
    return np.dot(np.concatenate(
        (joints, joints[:, 0:1] * 0 + 1), axis=1),
                  mat.T).reshape(shape)
 def affine_transform(pt, t):
    new_pt = np.array([pt[0], pt[1], 1.]).T
    new_pt = np.dot(t, new_pt)
    return new_pt[:2]
 def transform_preds(coords, center, scale, output_size):
    target_coords = np.zeros(coords.shape)
    trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
    for p in range(coords.shape[0]):
        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
    return target_coords
 def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
    if not isinstance(sigmas, np.ndarray):
        sigmas = np.array([
            .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
            .87, .87, .89, .89
        ]) / 10.0
    vars = (sigmas * 2)**2
    xg = g[0::3]
    yg = g[1::3]
    vg = g[2::3]
    ious = np.zeros((d.shape[0]))
    for n_d in range(0, d.shape[0]):
        xd = d[n_d, 0::3]
        yd = d[n_d, 1::3]
        vd = d[n_d, 2::3]
        dx = xd - xg
        dy = yd - yg
        e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
        if in_vis_thre is not None:
            ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
            e = e[ind]
        ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
    return ious
 def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
    """greedily select boxes with high confidence and overlap with current maximum <= thresh
    rule out overlap >= thresh
    Args:
        kpts_db (list): The predicted keypoints within the image
        thresh (float): The threshold to select the boxes
        sigmas (np.array): The variance to calculate the oks iou
            Default: None
        in_vis_thre (float): The threshold to select the high confidence boxes
            Default: None
    Return:
        keep (list): indexes to keep
    """
    if len(kpts_db) == 0:
        return []
    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
    kpts = np.array(
        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
    order = scores.argsort()[::-1]
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
                          sigmas, in_vis_thre)
        inds = np.where(oks_ovr <= thresh)[0]
        order = order[inds + 1]
    return keep
 def rescore(overlap, scores, thresh, type='gaussian'):
    assert overlap.shape[0] == scores.shape[0]
    if type == 'linear':
        inds = np.where(overlap >= thresh)[0]
        scores[inds] = scores[inds] * (1 - overlap[inds])
    else:
        scores = scores * np.exp(-overlap**2 / thresh)
    return scores
 def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
    """greedily select boxes with high confidence and overlap with current maximum <= thresh
    rule out overlap >= thresh
    Args:
        kpts_db (list): The predicted keypoints within the image
        thresh (float): The threshold to select the boxes
        sigmas (np.array): The variance to calculate the oks iou
            Default: None
        in_vis_thre (float): The threshold to select the high confidence boxes
            Default: None
    Return:
        keep (list): indexes to keep
    """
    if len(kpts_db) == 0:
        return []
    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
    kpts = np.array(
        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
    order = scores.argsort()[::-1]
    scores = scores[order]
    # max_dets = order.size
    max_dets = 20
    keep = np.zeros(max_dets, dtype=np.intp)
    keep_cnt = 0
    while order.size > 0 and keep_cnt < max_dets:
        i = order[0]
        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
                          sigmas, in_vis_thre)
        order = order[1:]
        scores = rescore(oks_ovr, scores[1:], thresh)
        tmp = scores.argsort()[::-1]
        order = order[tmp]
        scores = scores[tmp]
        keep[keep_cnt] = i
        keep_cnt += 1
    keep = keep[:keep_cnt]
    return keep
--- a/build/lib/ppdet/modeling/layers.py
+++ b/build/lib/ppdet/modeling/layers.py
--- a/build/lib/ppdet/modeling/losses/init.py
+++ b/build/lib/ppdet/modeling/losses/init.py
@ -0,0 +1,35 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import yolo_loss
 from . import iou_aware_loss
 from . import iou_loss
 from . import ssd_loss
 from . import fcos_loss
 from . import solov2_loss
 from . import ctfocal_loss
 from . import keypoint_loss
 from . import jde_loss
 from . import fairmot_loss
 from .yolo_loss import *
 from .iou_aware_loss import *
 from .iou_loss import *
 from .ssd_loss import *
 from .fcos_loss import *
 from .solov2_loss import *
 from .ctfocal_loss import *
 from .keypoint_loss import *
 from .jde_loss import *
 from .fairmot_loss import *
--- a/Show More
+++ b/Show More