forked from jiuyuan/InfiniTensor
Compare commits
10 Commits
Author | SHA1 | Date |
---|---|---|
![]() |
e33131ce5c | |
![]() |
886c5e5bd2 | |
![]() |
9b63a62b70 | |
![]() |
8baa34a1d2 | |
![]() |
19d3e831f9 | |
|
b3b1d3c2bf | |
![]() |
689f8c6a5d | |
![]() |
e3a2e65c47 | |
![]() |
06f5e82d8b | |
![]() |
cada8ec6c8 |
|
@ -109,6 +109,8 @@ class GraphHandlerObj {
|
|||
std::string mode);
|
||||
Tensor lrn(Tensor input, Tensor output, float alpha, float beta, float bias,
|
||||
int size);
|
||||
TensorVec dropout(Tensor input, Tensor output, Tensor mask, float ratio,
|
||||
bool training_mode);
|
||||
|
||||
//------ modifiers
|
||||
|
||||
|
|
|
@ -814,7 +814,6 @@ class OnnxStub:
|
|||
)
|
||||
!= 0
|
||||
)
|
||||
|
||||
tensors[node.output[0]] = self.handler.reduceSum(
|
||||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
|
@ -951,7 +950,27 @@ class OnnxStub:
|
|||
beta,
|
||||
bias,
|
||||
size,
|
||||
)
|
||||
)
|
||||
elif node.op_type == "Dropout":
|
||||
# HACK: Currently we only support model inference,
|
||||
# so training_mode is set to false.
|
||||
attributes = _parse_attribute(
|
||||
node, {"ratio": 0.5, "training_mode": 0})
|
||||
(ratio, training_mode) = (
|
||||
attributes[name]
|
||||
for name in ["ratio", "training_mode"]
|
||||
)
|
||||
for name, tensor in zip(
|
||||
node.output,
|
||||
self.handler.dropout(
|
||||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
tensors.get(node.output[1]),
|
||||
ratio,
|
||||
training_mode,
|
||||
),
|
||||
):
|
||||
tensors[name] = tensor
|
||||
else:
|
||||
raise Exception('Unsupported operator "{}"'.format(node.op_type))
|
||||
|
||||
|
@ -1328,6 +1347,18 @@ class OnnxStub:
|
|||
size,
|
||||
)
|
||||
)
|
||||
elif ty == backend.OpTypeId.Dropout:
|
||||
ratio, training_mode = backend.dropout_attrs_of(op)
|
||||
ctx.push_node(
|
||||
make_node(
|
||||
ty.name,
|
||||
inputs,
|
||||
outputs,
|
||||
name,
|
||||
ratio,
|
||||
training_mode,
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise Exception("Unsupported OpType", ty)
|
||||
|
||||
|
|
|
@ -463,13 +463,16 @@ class TestStringMethods(unittest.TestCase):
|
|||
def test_split(self):
|
||||
input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
|
||||
split = make_node("Split", ["input"], ["output"], name="split", axis=0)
|
||||
make_and_import_model(make_graph([split], "split", [input], []))
|
||||
output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
|
||||
make_and_import_model(make_graph([split], "split", [input], [output]))
|
||||
|
||||
def test_split1(self):
|
||||
input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
|
||||
splitAttr = make_tensor_value_info("split", TensorProto.INT64, [2, 1])
|
||||
split = make_node("Split", ["input", "split"], ["output"], name="split", axis=1)
|
||||
make_and_import_model(make_graph([split], "split", [input, splitAttr], []))
|
||||
splitAttr = make_tensor("split", TensorProto.INT64, [2], [2, 1])
|
||||
output1 = make_tensor_value_info("output1", TensorProto.FLOAT, [1, 2, 2, 4])
|
||||
output2 = make_tensor_value_info("output2", TensorProto.FLOAT, [1, 1, 2, 4])
|
||||
split = make_node("Split", ["input", "split"], ["output1", "output2"], name="split", axis=1)
|
||||
make_and_import_model(make_graph([split], "split", [input], [output1, output2], [splitAttr]))
|
||||
|
||||
def test_allBroadcast(self):
|
||||
input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "operators/broadcast.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/conv.h"
|
||||
#include "operators/dropout.h"
|
||||
#include "operators/element_wise.h"
|
||||
#include "operators/expand.h"
|
||||
#include "operators/gather.h"
|
||||
|
@ -610,6 +611,20 @@ Tensor GraphHandlerObj::lrn(Tensor input, Tensor output, float alpha,
|
|||
}
|
||||
}
|
||||
|
||||
TensorVec GraphHandlerObj::dropout(Tensor input, Tensor output, Tensor mask,
|
||||
float ratio, bool training_mode) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<DropoutObj>(std::move(input), output, mask, ratio,
|
||||
training_mode);
|
||||
return {output, mask};
|
||||
} else {
|
||||
return g
|
||||
->addOp<DropoutObj>(std::move(input), output, mask, ratio,
|
||||
training_mode)
|
||||
->getOutputs();
|
||||
}
|
||||
}
|
||||
|
||||
Tensor GraphHandlerObj::squeeze(Tensor input, Tensor output, Shape axes) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<SqueezeObj>(std::move(input), output,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "operators/batch_norm.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/conv.h"
|
||||
#include "operators/dropout.h"
|
||||
#include "operators/expand.h"
|
||||
#include "operators/gather.h"
|
||||
#include "operators/lrn.h"
|
||||
|
@ -328,6 +329,14 @@ static std::tuple<float, float, float, int> lrn_attrs_of(Operator op) {
|
|||
return std::make_tuple(alpha, beta, bias, size);
|
||||
}
|
||||
|
||||
static std::tuple<float, bool> dropout_attrs_of(Operator op) {
|
||||
IT_ASSERT(op->getOpType() == OpType::Dropout);
|
||||
auto dropout = dynamic_cast<const DropoutObj *>(op.get());
|
||||
auto ratio = dropout->getRatio();
|
||||
auto train = dropout->getTrainingMode();
|
||||
return std::make_tuple(ratio, train);
|
||||
}
|
||||
|
||||
void export_functions(py::module &m) {
|
||||
#define FUNCTION(NAME) def(#NAME, &NAME)
|
||||
m.def("cpu_runtime", &NativeCpuRuntimeObj::getInstance)
|
||||
|
@ -365,6 +374,7 @@ void export_functions(py::module &m) {
|
|||
.FUNCTION(flatten_axis_of)
|
||||
.FUNCTION(cast_to_of)
|
||||
.FUNCTION(depth_to_space_attrs_of)
|
||||
.FUNCTION(dropout_attrs_of)
|
||||
.FUNCTION(squeeze_axes_of)
|
||||
.FUNCTION(unsqueeze_axes_of)
|
||||
.FUNCTION(lrn_attrs_of);
|
||||
|
@ -558,6 +568,7 @@ void init_graph_builder(py::module &m) {
|
|||
.def("erf", &Handler::erf, policy::move)
|
||||
.def("where", &Handler::where, policy::move)
|
||||
.def("lrn", &Handler::lrn, policy::move)
|
||||
.def("dropout", &Handler::dropout, policy::move)
|
||||
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
||||
.def("optimize", &Handler::optimize, policy::automatic)
|
||||
.def("operators", &Handler::operators, policy::move)
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
#include "operators/dropout.h"
|
||||
#include "bang/bang_kernel_without_config.h"
|
||||
#include "bang/bang_runtime.h"
|
||||
|
||||
namespace infini {
|
||||
class DropoutCnnl : public BangKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<DropoutObj>(_op);
|
||||
IT_ASSERT(op->getDType() == DataType::Float32);
|
||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||
|
||||
void *const iData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const oData = (op->getOutput(0)->getRawDataPtr<void *>());
|
||||
void *const mData = (op->getOutput(1)->getRawDataPtr<void *>());
|
||||
|
||||
cnnlRandGenerator_t generator;
|
||||
cnnlRandCreateGenerator(&generator, CNNL_RAND_RNG_FAST);
|
||||
cnnlRandSetPseudoRandomGeneratorSeed(generator, 233);
|
||||
cnnlRandSetMTGP32Period(generator, CNNL_RAND_MTGP32_P11213);
|
||||
|
||||
cnnlTensorDescriptor_t oDesc;
|
||||
auto oDim = op->getOutput(0)->getDims();
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&oDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(oDesc, CNNL_LAYOUT_ARRAY,
|
||||
CNNL_DTYPE_FLOAT, oDim.size(),
|
||||
oDim.data()));
|
||||
cnnlTensorDescriptor_t mDesc;
|
||||
checkCnnlError(cnnlCreateTensorDescriptor(&mDesc));
|
||||
checkCnnlError(cnnlSetTensorDescriptor(mDesc, CNNL_LAYOUT_ARRAY,
|
||||
CNNL_DTYPE_UINT8, oDim.size(),
|
||||
oDim.data()));
|
||||
|
||||
auto ratio = op->getRatio();
|
||||
// auto train = op->getTrainingMode();
|
||||
|
||||
cnnlStatus_t stat =
|
||||
cnnlFusedDropout_v2(context->cnnlHandle(), generator, oDesc, iData,
|
||||
ratio, NULL, mDesc, mData, oDesc, oData);
|
||||
|
||||
if (stat != CNNL_STATUS_SUCCESS)
|
||||
return;
|
||||
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(oDesc));
|
||||
checkCnnlError(cnnlDestroyTensorDescriptor(mDesc));
|
||||
checkCnnlError(cnnlRandDestroyGenerator(generator));
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::BANG, OpType::Dropout, DropoutCnnl,
|
||||
"Dropout_cnnl_BANG_Float32");
|
||||
|
||||
}; // namespace infini
|
|
@ -7,6 +7,7 @@ class SliceCnnl : public BangKernelWithoutConfig {
|
|||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<SliceObj>(_op);
|
||||
IT_ASSERT(op->getDType() == DataType::Float32);
|
||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||
|
||||
auto starts = op->getStarts();
|
||||
|
@ -59,6 +60,6 @@ class SliceCnnl : public BangKernelWithoutConfig {
|
|||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::BANG, OpType::Slice, DataType::Float32, SliceCnnl,
|
||||
REGISTER_KERNEL(Device::BANG, OpType::Slice, SliceCnnl,
|
||||
"Slice_cnnl_BANG_Float32");
|
||||
}; // namespace infini
|
||||
|
|
Loading…
Reference in New Issue