forked from jiuyuan/InfiniTensor
add activation backward operation
This commit is contained in:
parent
468ed541af
commit
db9069f1b7
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
#include "core/operator.h"
|
||||
|
||||
namespace infini {
|
||||
class ActivationBackwardObj : public OperatorObj {
|
||||
public:
|
||||
ActivationBackwardObj(OpType type, GraphObj *graph, Tensor y, Tensor diff_y, Tensor x, Tensor diff_x);
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
|
||||
|
||||
std::string toString() const override;
|
||||
int numInputs() const override { return 3; }
|
||||
int numOutputs() const override { return 1; }
|
||||
|
||||
private:
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
};
|
||||
|
||||
#define DEFINE_ACTIVATION_BACKWARD_OBJ(prefix, type) \
|
||||
class prefix##Obj : public ActivationBackwardObj { \
|
||||
public: \
|
||||
prefix##Obj(GraphObj *graph, Tensor y, Tensor diff_y, Tensor x, Tensor diff_x) \
|
||||
: ActivationBackwardObj(type, graph, y, diff_y, x, diff_x) {} \
|
||||
};
|
||||
|
||||
DEFINE_ACTIVATION_BACKWARD_OBJ(ReluBackward, OpType::ReluBackward)
|
||||
DEFINE_ACTIVATION_BACKWARD_OBJ(SigmoidBackward, OpType::SigmoidBackward)
|
||||
DEFINE_ACTIVATION_BACKWARD_OBJ(TanhBackward, OpType::TanhBackward)
|
||||
}; // namespace infini
|
|
@ -0,0 +1,96 @@
|
|||
#include "bang/bang_kernel_without_config.h"
|
||||
#include "bang/bang_runtime.h"
|
||||
#include "operators/activation_backward.h"
|
||||
|
||||
namespace infini {
|
||||
class ActivationBackwardCnnl : public BangKernelWithoutConfig {
|
||||
virtual cnnlActivationMode_t getOpType() const = 0;
|
||||
virtual float getCoef() const = 0;
|
||||
virtual tuple<float, float> getAlphBeta() const { return {1.f, 0.f}; }
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ActivationBackwardObj>(_op);
|
||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||
|
||||
void *const yData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const diffYData = (op->getInputs(1)->getRawDataPtr<void *>());
|
||||
void *const xData = (op->getInputs(2)->getRawDataPtr<void *>());
|
||||
void *const diffXData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
|
||||
cnnlTensorDescriptor_t yDesc, diffYDesc, xDesc, diffXDesc;
|
||||
auto dim = op->getInputs(0)->getDims();
|
||||
if (dim.size() != 4)
|
||||
IT_TODO_HALT();
|
||||
|
||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
||||
// get inputs
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&yDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(yDesc, CNNL_LAYOUT_NCHW,
|
||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
||||
// get inputs
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&diffYDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(diffYDesc, CNNL_LAYOUT_NCHW,
|
||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
||||
// get inputs
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&xDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(xDesc, CNNL_LAYOUT_NCHW,
|
||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
||||
// get outputs
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&diffXDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(diffXDesc, CNNL_LAYOUT_NCHW,
|
||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
||||
|
||||
// get op descriptor
|
||||
cnnlActivationDescriptor_t opDesc;
|
||||
checkCnnlError(cnnlCreateActivationDescriptor(&opDesc));
|
||||
checkCnnlError(cnnlSetActivationDescriptor(
|
||||
opDesc, getOpType(), CNNL_NOT_PROPAGATE_NAN, getCoef()));
|
||||
|
||||
auto [alpha, beta] = getAlphBeta();
|
||||
cnnlStatus_t stat =
|
||||
cnnlActivationBackward(context->cnnlHandle(), opDesc, &alpha, yDesc, yData,
|
||||
diffYDesc, diffYData,
|
||||
xDesc, xData,
|
||||
&beta, diffXDesc, diffXData);
|
||||
if (stat != CNNL_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
// Destories in BANG does not require sync. But cnnl does not state
|
||||
// whether sync is required before destories.
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(yDesc));
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(diffYDesc));
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(xDesc));
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(diffXDesc));
|
||||
checkCnnlError(cnnlDestroyActivationDescriptor(opDesc));
|
||||
}
|
||||
};
|
||||
|
||||
class ReluBackwardCnnl : public ActivationBackwardCnnl {
|
||||
cnnlActivationMode_t getOpType() const override {
|
||||
return CNNL_ACTIVATION_RELU;
|
||||
}
|
||||
float getCoef() const override { return 0.0; }
|
||||
};
|
||||
|
||||
class SigmoidBackwardCnnl : public ActivationBackwardCnnl {
|
||||
cnnlActivationMode_t getOpType() const override {
|
||||
return CNNL_ACTIVATION_SIGMOID;
|
||||
}
|
||||
float getCoef() const override { return 0.0; }
|
||||
};
|
||||
|
||||
class TanhBackwardCnnl : public ActivationBackwardCnnl {
|
||||
cnnlActivationMode_t getOpType() const override {
|
||||
return CNNL_ACTIVATION_TANH;
|
||||
}
|
||||
float getCoef() const override { return 0.0; }
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::BANG, OpType::ReluBackward, DataType::Float32, ReluBackwardCnnl,
|
||||
"ReluBackward_cnnl_BANG_Float32");
|
||||
REGISTER_KERNEL(Device::BANG, OpType::SigmoidBackward, DataType::Float32, SigmoidBackwardCnnl,
|
||||
"SigmoidBackward_cnnl_BANG_Float32");
|
||||
REGISTER_KERNEL(Device::BANG, OpType::TanhBackward, DataType::Float32, TanhBackwardCnnl,
|
||||
"TanhBackward_cnnl_BANG_Float32");
|
||||
|
||||
}; // namespace infini
|
|
@ -0,0 +1,40 @@
|
|||
#include "operators/activation_backward.h"
|
||||
|
||||
namespace infini {
|
||||
ActivationBackwardObj::ActivationBackwardObj(OpType type,
|
||||
GraphObj *graph,
|
||||
Tensor y,
|
||||
Tensor diff_y,
|
||||
Tensor x,
|
||||
Tensor diff_x)
|
||||
: OperatorObj(type, {y, diff_y, x}, {diff_x}) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
|
||||
optional<vector<Shape>> ActivationBackwardObj::inferShape(const TensorVec &inputs) const {
|
||||
const auto A = inputs[0];
|
||||
return {{A->getDims()}};
|
||||
}
|
||||
|
||||
std::string ActivationBackwardObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << OpRegistry::getOpName(type) << "[" << getGuid() << "]";
|
||||
os << "(";
|
||||
os << vecToString(inputs[0]->getDims()) << ",";
|
||||
os << "input=" << inputs[0]->getGuid() << ",";
|
||||
os << "output=" << outputs[0]->getGuid() << ")";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> ActivationBackwardObj::getWorkloadVector() const {
|
||||
vector<int> ret{enum_to_underlying(type)};
|
||||
const Shape shape = outputs[0]->getDims();
|
||||
ret.insert(ret.end(), shape.begin(), shape.end());
|
||||
return ret;
|
||||
}
|
||||
|
||||
vector<int> ActivationBackwardObj::getOpAttrVector() const {
|
||||
return {enum_to_underlying(type)};
|
||||
}
|
||||
|
||||
}; // namespace infini
|
|
@ -0,0 +1,52 @@
|
|||
#include "bang/bang_runtime.h"
|
||||
#include "core/graph.h"
|
||||
#include "core/kernel.h"
|
||||
#include "core/runtime.h"
|
||||
#include "operators/activation_backward.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
template <class T>
|
||||
void testActivationBackward(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = CpuRuntimeObj::getInstance();
|
||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor yCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
Tensor diffYCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
Tensor xCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
|
||||
yCpu->dataMalloc();
|
||||
diffYCpu->dataMalloc();
|
||||
xCpu->dataMalloc();
|
||||
|
||||
yCpu->setData(generator);
|
||||
diffYCpu->setData(generator);
|
||||
xCpu->setData(generator);
|
||||
|
||||
// GPU
|
||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
||||
auto yGpu = bangGraph->cloneTensor(yCpu);
|
||||
auto diffYGpu = bangGraph->cloneTensor(diffYCpu);
|
||||
auto xGpu = bangGraph->cloneTensor(xCpu);
|
||||
auto gpuOp = bangGraph->addOp<T>(yGpu, diffYGpu, xGpu, nullptr);
|
||||
bangGraph->dataMalloc();
|
||||
bangRuntime->run(bangGraph);
|
||||
auto diffXGpu = gpuOp->getOutput();
|
||||
auto diffXGpu2Cpu = diffXGpu->clone(cpuRuntime);
|
||||
// Check
|
||||
diffXGpu2Cpu->print();
|
||||
EXPECT_TRUE(1);
|
||||
}
|
||||
|
||||
TEST(cnnl_ActivationBackward, run) {
|
||||
testActivationBackward<ReluBackwardObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testActivationBackward<SigmoidBackwardObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testActivationBackward<TanhBackwardObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
}
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue