forked from jiuyuan/InfiniTensor
add hardtanh operation
This commit is contained in:
parent
8d7150f815
commit
6b53a50927
|
@ -93,6 +93,7 @@ enum class OpType {
|
|||
Square,
|
||||
SquaredDifference,
|
||||
Flip,
|
||||
Hardtanh,
|
||||
//
|
||||
MemBound = 300,
|
||||
};
|
||||
|
@ -194,6 +195,7 @@ class OpRegistry {
|
|||
FOP(Square);
|
||||
FOP(SquaredDifference);
|
||||
FOP(Flip);
|
||||
FOP(Hardtanh);
|
||||
//
|
||||
FOP(MemBound);
|
||||
default:
|
||||
|
|
|
@ -33,6 +33,23 @@ class ClipObj : public OperatorObj {
|
|||
vector<int> getOpAttrVector() const override;
|
||||
};
|
||||
|
||||
class HardtanhObj : public OperatorObj {
|
||||
public:
|
||||
HardtanhObj(GraphObj *graph, Tensor input, Tensor output, float min, float max);
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
|
||||
|
||||
std::string toString() const override;
|
||||
float getMin() const { return minValue; };
|
||||
float getMax() const { return maxValue; };
|
||||
int numInputs() const override { return 1; }
|
||||
int numOutputs() const override { return 1; }
|
||||
|
||||
private:
|
||||
float minValue, maxValue;
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
};
|
||||
|
||||
class FlipObj : public OperatorObj {
|
||||
public:
|
||||
FlipObj(GraphObj *graph, Tensor input, Tensor output, vector<int> axis);
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#include "bang/bang_kernel_without_config.h"
|
||||
#include "bang/bang_runtime.h"
|
||||
#include "operators/unary.h"
|
||||
|
||||
namespace infini {
|
||||
class HardtanhCnnl : public BangKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<HardtanhObj>(_op);
|
||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
float min = op->getMin();
|
||||
float max = op->getMax();
|
||||
|
||||
cnnlTensorDescriptor_t aDesc;
|
||||
auto dim = op->getInputs(0)->getDims();
|
||||
if (dim.size() != 4)
|
||||
IT_TODO_HALT();
|
||||
|
||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
||||
// get inputs
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
||||
|
||||
cnnlStatus_t stat =
|
||||
cnnlHardtanh(context->cnnlHandle(), aDesc, aData, max, min, aDesc, cData);
|
||||
if (stat != CNNL_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
// Destories in BANG does not require sync. But cnnl does not state
|
||||
// whether sync is required before destories.
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::BANG, OpType::Hardtanh, DataType::Float32, HardtanhCnnl,
|
||||
"Hardtanh_cnnl_BANG_Float32");
|
||||
|
||||
}; // namespace infini
|
|
@ -65,6 +65,39 @@ vector<int> ClipObj::getOpAttrVector() const {
|
|||
return {enum_to_underlying(type)};
|
||||
}
|
||||
|
||||
HardtanhObj::HardtanhObj(GraphObj *graph, Tensor input, Tensor output, float min,
|
||||
float max)
|
||||
: OperatorObj(OpType::Hardtanh, {input}, {output}), minValue(min),
|
||||
maxValue(max) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
|
||||
optional<vector<Shape>> HardtanhObj::inferShape(const TensorVec &inputs) const {
|
||||
const auto A = inputs[0];
|
||||
return {{A->getDims()}};
|
||||
}
|
||||
|
||||
std::string HardtanhObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << OpRegistry::getOpName(type) << "[" << getGuid() << "]";
|
||||
os << "(";
|
||||
os << vecToString(inputs[0]->getDims()) << ",";
|
||||
os << "input=" << inputs[0]->getGuid() << ",";
|
||||
os << "output=" << outputs[0]->getGuid() << ")";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> HardtanhObj::getWorkloadVector() const {
|
||||
vector<int> ret{enum_to_underlying(type)};
|
||||
const Shape shape = outputs[0]->getDims();
|
||||
ret.insert(ret.end(), shape.begin(), shape.end());
|
||||
return ret;
|
||||
}
|
||||
|
||||
vector<int> HardtanhObj::getOpAttrVector() const {
|
||||
return {enum_to_underlying(type)};
|
||||
}
|
||||
|
||||
FlipObj::FlipObj(GraphObj *graph, Tensor input, Tensor output, vector<int> axis)
|
||||
: OperatorObj(OpType::Flip, {input}, {output}), axisValue(axis) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#include "bang/bang_runtime.h"
|
||||
#include "core/graph.h"
|
||||
#include "core/kernel.h"
|
||||
#include "core/runtime.h"
|
||||
#include "operators/unary.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
template <class T>
|
||||
void testHardtanh(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = CpuRuntimeObj::getInstance();
|
||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
inputCpu->dataMalloc();
|
||||
inputCpu->setData(generator);
|
||||
|
||||
// GPU
|
||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
||||
float min = 1.0;
|
||||
float max = 4.0;
|
||||
auto gpuOp = bangGraph->addOp<T>(inputGpu, nullptr, min, max);
|
||||
bangGraph->dataMalloc();
|
||||
bangRuntime->run(bangGraph);
|
||||
auto outputGpu = gpuOp->getOutput();
|
||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
||||
inputCpu->printData();
|
||||
outputGpu2Cpu->printData();
|
||||
EXPECT_TRUE(1);
|
||||
}
|
||||
|
||||
TEST(cnnl_Hardtanh, run) {
|
||||
testHardtanh<HardtanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
}
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue