add hardtanh operation

2023-01-29 02:37:15 +00:00 · 2023-01-29 02:37:15 +00:00 · 6b53a50927
parent 8d7150f815
commit 6b53a50927
5 changed files with 136 additions and 0 deletions
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -93,6 +93,7 @@ enum class OpType {
    Square,
    SquaredDifference,
    Flip,
+    Hardtanh,
    //
    MemBound = 300,
 };
@ -194,6 +195,7 @@ class OpRegistry {
            FOP(Square);
            FOP(SquaredDifference);
            FOP(Flip);
+            FOP(Hardtanh);
            //
            FOP(MemBound);
        default:
--- a/include/operators/unary.h
+++ b/include/operators/unary.h
@ -33,6 +33,23 @@ class ClipObj : public OperatorObj {
    vector<int> getOpAttrVector() const override;
 };

+class HardtanhObj : public OperatorObj {
+  public:
+    HardtanhObj(GraphObj *graph, Tensor input, Tensor output, float min, float max);
+    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
+
+    std::string toString() const override;
+    float getMin() const { return minValue; };
+    float getMax() const { return maxValue; };
+    int numInputs() const override { return 1; }
+    int numOutputs() const override { return 1; }
+
+  private:
+    float minValue, maxValue;
+    vector<int> getWorkloadVector() const override;
+    vector<int> getOpAttrVector() const override;
+};
+
 class FlipObj : public OperatorObj {
  public:
    FlipObj(GraphObj *graph, Tensor input, Tensor output, vector<int> axis);
--- a/src/kernels/bang/hardtanh.cc
+++ b/src/kernels/bang/hardtanh.cc
@ -0,0 +1,42 @@
+#include "bang/bang_kernel_without_config.h"
+#include "bang/bang_runtime.h"
+#include "operators/unary.h"
+
+namespace infini {
+class HardtanhCnnl : public BangKernelWithoutConfig {
+    void compute(const Operator &_op,
+                 const RuntimeObj *_context) const override {
+        auto op = as<HardtanhObj>(_op);
+        auto context = dynamic_cast<const BangRuntimeObj *>(_context);
+
+        void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
+        void *const cData = (op->getOutput()->getRawDataPtr<void *>());
+        float min = op->getMin();
+        float max = op->getMax();
+
+        cnnlTensorDescriptor_t aDesc;
+        auto dim = op->getInputs(0)->getDims();
+        if (dim.size() != 4)
+            IT_TODO_HALT();
+
+        int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
+        // get inputs
+        checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
+        checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
+                                               CNNL_DTYPE_FLOAT, 4, dim_array));
+
+        cnnlStatus_t stat =
+            cnnlHardtanh(context->cnnlHandle(), aDesc, aData, max, min, aDesc, cData);
+        if (stat != CNNL_STATUS_SUCCESS)
+            return;
+
+        // Destories in BANG does not require sync. But cnnl does not state
+        // whether sync is required before destories.
+        checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
+    }
+};
+
+REGISTER_KERNEL(Device::BANG, OpType::Hardtanh, DataType::Float32, HardtanhCnnl,
+                "Hardtanh_cnnl_BANG_Float32");
+
+}; // namespace infini
--- a/src/operators/unary.cc
+++ b/src/operators/unary.cc
@ -65,6 +65,39 @@ vector<int> ClipObj::getOpAttrVector() const {
    return {enum_to_underlying(type)};
 }

+HardtanhObj::HardtanhObj(GraphObj *graph, Tensor input, Tensor output, float min,
+                 float max)
+    : OperatorObj(OpType::Hardtanh, {input}, {output}), minValue(min),
+      maxValue(max) {
+    IT_ASSERT(checkValid(graph));
+}
+
+optional<vector<Shape>> HardtanhObj::inferShape(const TensorVec &inputs) const {
+    const auto A = inputs[0];
+    return {{A->getDims()}};
+}
+
+std::string HardtanhObj::toString() const {
+    std::ostringstream os;
+    os << OpRegistry::getOpName(type) << "[" << getGuid() << "]";
+    os << "(";
+    os << vecToString(inputs[0]->getDims()) << ",";
+    os << "input=" << inputs[0]->getGuid() << ",";
+    os << "output=" << outputs[0]->getGuid() << ")";
+    return os.str();
+}
+
+vector<int> HardtanhObj::getWorkloadVector() const {
+    vector<int> ret{enum_to_underlying(type)};
+    const Shape shape = outputs[0]->getDims();
+    ret.insert(ret.end(), shape.begin(), shape.end());
+    return ret;
+}
+
+vector<int> HardtanhObj::getOpAttrVector() const {
+    return {enum_to_underlying(type)};
+}
+
 FlipObj::FlipObj(GraphObj *graph, Tensor input, Tensor output, vector<int> axis)
    : OperatorObj(OpType::Flip, {input}, {output}), axisValue(axis) {
    IT_ASSERT(checkValid(graph));
--- a/test/kernels/bang/test_bang_hardtanh.cc
+++ b/test/kernels/bang/test_bang_hardtanh.cc
@ -0,0 +1,42 @@
+#include "bang/bang_runtime.h"
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/runtime.h"
+#include "operators/unary.h"
+
+#include "test.h"
+
+namespace infini {
+
+template <class T>
+void testHardtanh(const std::function<void(void *, size_t, DataType)> &generator,
+              const Shape &shape) {
+    // Runtime
+    Runtime cpuRuntime = CpuRuntimeObj::getInstance();
+    auto bangRuntime = make_ref<BangRuntimeObj>();
+
+    // Build input data on CPU
+    Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
+    inputCpu->dataMalloc();
+    inputCpu->setData(generator);
+
+    // GPU
+    Graph bangGraph = make_ref<GraphObj>(bangRuntime);
+    auto inputGpu = bangGraph->cloneTensor(inputCpu);
+    float min = 1.0;
+    float max = 4.0;
+    auto gpuOp = bangGraph->addOp<T>(inputGpu, nullptr, min, max);
+    bangGraph->dataMalloc();
+    bangRuntime->run(bangGraph);
+    auto outputGpu = gpuOp->getOutput();
+    auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
+    inputCpu->printData();
+    outputGpu2Cpu->printData();
+    EXPECT_TRUE(1);
+}
+
+TEST(cnnl_Hardtanh, run) {
+    testHardtanh<HardtanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
+}
+
+} // namespace infini