diff --git a/include/operators/activation_backward.h b/include/operators/activation_backward.h new file mode 100644 index 00000000..02e51c2d --- /dev/null +++ b/include/operators/activation_backward.h @@ -0,0 +1,29 @@ +#pragma once +#include "core/operator.h" + +namespace infini { +class ActivationBackwardObj : public OperatorObj { + public: + ActivationBackwardObj(OpType type, GraphObj *graph, Tensor y, Tensor diff_y, Tensor x, Tensor diff_x); + optional> inferShape(const TensorVec &inputs) const override; + + std::string toString() const override; + int numInputs() const override { return 3; } + int numOutputs() const override { return 1; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; + +#define DEFINE_ACTIVATION_BACKWARD_OBJ(prefix, type) \ + class prefix##Obj : public ActivationBackwardObj { \ + public: \ + prefix##Obj(GraphObj *graph, Tensor y, Tensor diff_y, Tensor x, Tensor diff_x) \ + : ActivationBackwardObj(type, graph, y, diff_y, x, diff_x) {} \ + }; + +DEFINE_ACTIVATION_BACKWARD_OBJ(ReluBackward, OpType::ReluBackward) +DEFINE_ACTIVATION_BACKWARD_OBJ(SigmoidBackward, OpType::SigmoidBackward) +DEFINE_ACTIVATION_BACKWARD_OBJ(TanhBackward, OpType::TanhBackward) +}; // namespace infini diff --git a/src/kernels/bang/activation_backward.cc b/src/kernels/bang/activation_backward.cc new file mode 100644 index 00000000..cb321409 --- /dev/null +++ b/src/kernels/bang/activation_backward.cc @@ -0,0 +1,96 @@ +#include "bang/bang_kernel_without_config.h" +#include "bang/bang_runtime.h" +#include "operators/activation_backward.h" + +namespace infini { +class ActivationBackwardCnnl : public BangKernelWithoutConfig { + virtual cnnlActivationMode_t getOpType() const = 0; + virtual float getCoef() const = 0; + virtual tuple getAlphBeta() const { return {1.f, 0.f}; } + void compute(const Operator &_op, + const RuntimeObj *_context) const override { + auto op = as(_op); + auto context = dynamic_cast(_context); + + void *const yData = (op->getInputs(0)->getRawDataPtr()); + void *const diffYData = (op->getInputs(1)->getRawDataPtr()); + void *const xData = (op->getInputs(2)->getRawDataPtr()); + void *const diffXData = (op->getOutput()->getRawDataPtr()); + + cnnlTensorDescriptor_t yDesc, diffYDesc, xDesc, diffXDesc; + auto dim = op->getInputs(0)->getDims(); + if (dim.size() != 4) + IT_TODO_HALT(); + + int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]}; + // get inputs + checkCnnlError(cnnlCreateTensorDescriptor(&yDesc)); + checkCnnlError(cnnlSetTensorDescriptor(yDesc, CNNL_LAYOUT_NCHW, + CNNL_DTYPE_FLOAT, 4, dim_array)); + // get inputs + checkCnnlError(cnnlCreateTensorDescriptor(&diffYDesc)); + checkCnnlError(cnnlSetTensorDescriptor(diffYDesc, CNNL_LAYOUT_NCHW, + CNNL_DTYPE_FLOAT, 4, dim_array)); + // get inputs + checkCnnlError(cnnlCreateTensorDescriptor(&xDesc)); + checkCnnlError(cnnlSetTensorDescriptor(xDesc, CNNL_LAYOUT_NCHW, + CNNL_DTYPE_FLOAT, 4, dim_array)); + // get outputs + checkCnnlError(cnnlCreateTensorDescriptor(&diffXDesc)); + checkCnnlError(cnnlSetTensorDescriptor(diffXDesc, CNNL_LAYOUT_NCHW, + CNNL_DTYPE_FLOAT, 4, dim_array)); + + // get op descriptor + cnnlActivationDescriptor_t opDesc; + checkCnnlError(cnnlCreateActivationDescriptor(&opDesc)); + checkCnnlError(cnnlSetActivationDescriptor( + opDesc, getOpType(), CNNL_NOT_PROPAGATE_NAN, getCoef())); + + auto [alpha, beta] = getAlphBeta(); + cnnlStatus_t stat = + cnnlActivationBackward(context->cnnlHandle(), opDesc, &alpha, yDesc, yData, + diffYDesc, diffYData, + xDesc, xData, + &beta, diffXDesc, diffXData); + if (stat != CNNL_STATUS_SUCCESS) + return; + + // Destories in BANG does not require sync. But cnnl does not state + // whether sync is required before destories. + checkCnnlError(cnnlDestroyTensorDescriptor(yDesc)); + checkCnnlError(cnnlDestroyTensorDescriptor(diffYDesc)); + checkCnnlError(cnnlDestroyTensorDescriptor(xDesc)); + checkCnnlError(cnnlDestroyTensorDescriptor(diffXDesc)); + checkCnnlError(cnnlDestroyActivationDescriptor(opDesc)); + } +}; + +class ReluBackwardCnnl : public ActivationBackwardCnnl { + cnnlActivationMode_t getOpType() const override { + return CNNL_ACTIVATION_RELU; + } + float getCoef() const override { return 0.0; } +}; + +class SigmoidBackwardCnnl : public ActivationBackwardCnnl { + cnnlActivationMode_t getOpType() const override { + return CNNL_ACTIVATION_SIGMOID; + } + float getCoef() const override { return 0.0; } +}; + +class TanhBackwardCnnl : public ActivationBackwardCnnl { + cnnlActivationMode_t getOpType() const override { + return CNNL_ACTIVATION_TANH; + } + float getCoef() const override { return 0.0; } +}; + +REGISTER_KERNEL(Device::BANG, OpType::ReluBackward, DataType::Float32, ReluBackwardCnnl, + "ReluBackward_cnnl_BANG_Float32"); +REGISTER_KERNEL(Device::BANG, OpType::SigmoidBackward, DataType::Float32, SigmoidBackwardCnnl, + "SigmoidBackward_cnnl_BANG_Float32"); +REGISTER_KERNEL(Device::BANG, OpType::TanhBackward, DataType::Float32, TanhBackwardCnnl, + "TanhBackward_cnnl_BANG_Float32"); + +}; // namespace infini diff --git a/src/operators/activation_backward.cc b/src/operators/activation_backward.cc new file mode 100644 index 00000000..1ae723aa --- /dev/null +++ b/src/operators/activation_backward.cc @@ -0,0 +1,40 @@ +#include "operators/activation_backward.h" + +namespace infini { +ActivationBackwardObj::ActivationBackwardObj(OpType type, + GraphObj *graph, + Tensor y, + Tensor diff_y, + Tensor x, + Tensor diff_x) + : OperatorObj(type, {y, diff_y, x}, {diff_x}) { + IT_ASSERT(checkValid(graph)); +} + +optional> ActivationBackwardObj::inferShape(const TensorVec &inputs) const { + const auto A = inputs[0]; + return {{A->getDims()}}; +} + +std::string ActivationBackwardObj::toString() const { + std::ostringstream os; + os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +vector ActivationBackwardObj::getWorkloadVector() const { + vector ret{enum_to_underlying(type)}; + const Shape shape = outputs[0]->getDims(); + ret.insert(ret.end(), shape.begin(), shape.end()); + return ret; +} + +vector ActivationBackwardObj::getOpAttrVector() const { + return {enum_to_underlying(type)}; +} + +}; // namespace infini diff --git a/test/kernels/bang/test_bang_activation_backward.cc b/test/kernels/bang/test_bang_activation_backward.cc new file mode 100644 index 00000000..2d23d8b4 --- /dev/null +++ b/test/kernels/bang/test_bang_activation_backward.cc @@ -0,0 +1,52 @@ +#include "bang/bang_runtime.h" +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/activation_backward.h" + +#include "test.h" + +namespace infini { + +template +void testActivationBackward(const std::function &generator, + const Shape &shape) { + // Runtime + Runtime cpuRuntime = CpuRuntimeObj::getInstance(); + auto bangRuntime = make_ref(); + + // Build input data on CPU + Tensor yCpu = make_ref(shape, DataType::Float32, cpuRuntime); + Tensor diffYCpu = make_ref(shape, DataType::Float32, cpuRuntime); + Tensor xCpu = make_ref(shape, DataType::Float32, cpuRuntime); + + yCpu->dataMalloc(); + diffYCpu->dataMalloc(); + xCpu->dataMalloc(); + + yCpu->setData(generator); + diffYCpu->setData(generator); + xCpu->setData(generator); + + // GPU + Graph bangGraph = make_ref(bangRuntime); + auto yGpu = bangGraph->cloneTensor(yCpu); + auto diffYGpu = bangGraph->cloneTensor(diffYCpu); + auto xGpu = bangGraph->cloneTensor(xCpu); + auto gpuOp = bangGraph->addOp(yGpu, diffYGpu, xGpu, nullptr); + bangGraph->dataMalloc(); + bangRuntime->run(bangGraph); + auto diffXGpu = gpuOp->getOutput(); + auto diffXGpu2Cpu = diffXGpu->clone(cpuRuntime); + // Check + diffXGpu2Cpu->print(); + EXPECT_TRUE(1); +} + +TEST(cnnl_ActivationBackward, run) { + testActivationBackward(IncrementalGenerator(), Shape{1, 2, 2, 3}); + testActivationBackward(IncrementalGenerator(), Shape{1, 2, 2, 3}); + testActivationBackward(IncrementalGenerator(), Shape{1, 2, 2, 3}); +} + +} // namespace infini