add ceil operation and floor operation

2022-12-14 02:50:06 +00:00 · 2022-12-14 02:50:06 +00:00 · 58b89dd601
parent 46a1bb2773
commit 58b89dd601
6 changed files with 179 additions and 0 deletions
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -57,6 +57,8 @@ enum class OpType {
    Resize,
    Arange,
    Copy,
+    Ceil,
+    Floor,
    //
    MemBound = 300,
 };
@ -122,6 +124,8 @@ class OpRegistry {
            FOP(ATanH);
            FOP(Arange);
            FOP(Copy);
+            FOP(Ceil);
+            FOP(Floor);
            //
            FOP(MemBound);
        default:
--- a/include/operators/unary.h
+++ b/include/operators/unary.h
@ -41,5 +41,8 @@ DEFINE_UNARY_OBJ(TanH, OpType::TanH)
 DEFINE_UNARY_OBJ(ASinH, OpType::ASinH)
 DEFINE_UNARY_OBJ(ACosH, OpType::ACosH)
 DEFINE_UNARY_OBJ(ATanH, OpType::ATanH)
+
 DEFINE_UNARY_OBJ(Copy, OpType::Copy)
+DEFINE_UNARY_OBJ(Ceil, OpType::Ceil)
+DEFINE_UNARY_OBJ(Floor, OpType::Floor)
 }; // namespace infini
--- a/src/kernels/bang/ceil.cc
+++ b/src/kernels/bang/ceil.cc
@ -0,0 +1,46 @@
+#include "bang/bang_kernel_without_config.h"
+#include "bang/bang_runtime.h"
+#include "operators/unary.h"
+
+namespace infini {
+class CeilCnnl : public BangKernelWithoutConfig {
+    void compute(const Operator &_op,
+                 const RuntimeObj *_context) const override {
+        auto op = as<UnaryObj>(_op);
+        auto context = dynamic_cast<const BangRuntimeObj *>(_context);
+
+        void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
+        void *const cData = (op->getOutput()->getRawDataPtr<void *>());
+
+        cnnlTensorDescriptor_t aDesc, cDesc;
+        auto dim = op->getInputs(0)->getDims();
+        if (dim.size() != 4)
+            IT_TODO_HALT();
+
+        int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
+        // get inputs
+        checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
+        checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
+                                               CNNL_DTYPE_FLOAT, 4, dim_array));
+
+        // get outputs
+        checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
+        checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
+                                               CNNL_DTYPE_FLOAT, 4, dim_array));
+
+        cnnlStatus_t stat =
+            cnnlCeil(context->cnnlHandle(), aDesc, aData, cDesc, cData);
+        if (stat != CNNL_STATUS_SUCCESS)
+            return;
+
+        // Destories in BANG does not require sync. But cnnl does not state
+        // whether sync is required before destories.
+        checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
+        checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
+    }
+};
+
+REGISTER_KERNEL(Device::BANG, OpType::Ceil, DataType::Float32, CeilCnnl,
+                "Ceil_cnnl_BANG_Float32");
+
+}; // namespace infini
--- a/src/kernels/bang/floor.cc
+++ b/src/kernels/bang/floor.cc
@ -0,0 +1,46 @@
+#include "bang/bang_kernel_without_config.h"
+#include "bang/bang_runtime.h"
+#include "operators/unary.h"
+
+namespace infini {
+class FloorCnnl : public BangKernelWithoutConfig {
+    void compute(const Operator &_op,
+                 const RuntimeObj *_context) const override {
+        auto op = as<UnaryObj>(_op);
+        auto context = dynamic_cast<const BangRuntimeObj *>(_context);
+
+        void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
+        void *const cData = (op->getOutput()->getRawDataPtr<void *>());
+
+        cnnlTensorDescriptor_t aDesc, cDesc;
+        auto dim = op->getInputs(0)->getDims();
+        if (dim.size() != 4)
+            IT_TODO_HALT();
+
+        int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
+        // get inputs
+        checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
+        checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
+                                               CNNL_DTYPE_FLOAT, 4, dim_array));
+
+        // get outputs
+        checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
+        checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
+                                               CNNL_DTYPE_FLOAT, 4, dim_array));
+
+        cnnlStatus_t stat =
+            cnnlFloor(context->cnnlHandle(), aDesc, aData, cDesc, cData);
+        if (stat != CNNL_STATUS_SUCCESS)
+            return;
+
+        // Destories in BANG does not require sync. But cnnl does not state
+        // whether sync is required before destories.
+        checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
+        checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
+    }
+};
+
+REGISTER_KERNEL(Device::BANG, OpType::Floor, DataType::Float32, FloorCnnl,
+                "Floor_cnnl_BANG_Float32");
+
+}; // namespace infini
--- a/test/kernels/bang/test_bang_ceil.cc
+++ b/test/kernels/bang/test_bang_ceil.cc
@ -0,0 +1,40 @@
+#include "bang/bang_runtime.h"
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/runtime.h"
+#include "operators/unary.h"
+
+#include "test.h"
+
+namespace infini {
+
+template <class T>
+void testCeil(const std::function<void(void *, size_t, DataType)> &generator,
+               const Shape &shape) {
+    // Runtime
+    Runtime cpuRuntime = CpuRuntimeObj::getInstance();
+    auto bangRuntime = make_ref<BangRuntimeObj>();
+
+    // Build input data on CPU
+    Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
+    inputCpu->dataMalloc();
+    inputCpu->setData(generator);
+
+    // GPU
+    Graph bangGraph = make_ref<GraphObj>(bangRuntime);
+    auto inputGpu = bangGraph->cloneTensor(inputCpu);
+    auto gpuOp = bangGraph->addOp<T>(inputGpu, nullptr);
+    bangGraph->dataMalloc();
+    bangRuntime->run(bangGraph);
+    auto outputGpu = gpuOp->getOutput();
+    auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
+    inputCpu->printData();
+    outputGpu2Cpu->printData();
+    EXPECT_TRUE(1);
+}
+
+TEST(cnnl_Ceil, run) {
+    testCeil<CeilObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
+}
+
+} // namespace infini
--- a/test/kernels/bang/test_bang_floor.cc
+++ b/test/kernels/bang/test_bang_floor.cc
@ -0,0 +1,40 @@
+#include "bang/bang_runtime.h"
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/runtime.h"
+#include "operators/unary.h"
+
+#include "test.h"
+
+namespace infini {
+
+template <class T>
+void testFloor(const std::function<void(void *, size_t, DataType)> &generator,
+               const Shape &shape) {
+    // Runtime
+    Runtime cpuRuntime = CpuRuntimeObj::getInstance();
+    auto bangRuntime = make_ref<BangRuntimeObj>();
+
+    // Build input data on CPU
+    Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
+    inputCpu->dataMalloc();
+    inputCpu->setData(generator);
+
+    // GPU
+    Graph bangGraph = make_ref<GraphObj>(bangRuntime);
+    auto inputGpu = bangGraph->cloneTensor(inputCpu);
+    auto gpuOp = bangGraph->addOp<T>(inputGpu, nullptr);
+    bangGraph->dataMalloc();
+    bangRuntime->run(bangGraph);
+    auto outputGpu = gpuOp->getOutput();
+    auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
+    inputCpu->printData();
+    outputGpu2Cpu->printData();
+    EXPECT_TRUE(1);
+}
+
+TEST(cnnl_Floor, run) {
+    testFloor<FloorObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
+}
+
+} // namespace infini