forked from jiuyuan/InfiniTensor
Add ReduceSum op and kernel (#160)
* Add reduceSum op and kernel * fix merge and format * Reduce: reuse cat macro, add doc string --------- Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
595a9906d2
commit
6ece3f4a77
|
@ -73,6 +73,8 @@ class GraphHandlerObj {
|
||||||
Tensor gatherElements(Tensor data, Tensor indices, Tensor output, int axis);
|
Tensor gatherElements(Tensor data, Tensor indices, Tensor output, int axis);
|
||||||
Tensor reduceMean(Tensor data, Tensor reduced,
|
Tensor reduceMean(Tensor data, Tensor reduced,
|
||||||
const optional<vector<int>> &axes, bool keepdims);
|
const optional<vector<int>> &axes, bool keepdims);
|
||||||
|
Tensor reduceSum(Tensor data, Tensor reduced,
|
||||||
|
const optional<vector<int>> &axes, bool keepdims);
|
||||||
Tensor slice(Tensor input, Tensor output, const vector<int> &starts,
|
Tensor slice(Tensor input, Tensor output, const vector<int> &starts,
|
||||||
const vector<int> &ends, const optional<vector<int>> &axes,
|
const vector<int> &ends, const optional<vector<int>> &axes,
|
||||||
const optional<vector<int>> &steps);
|
const optional<vector<int>> &steps);
|
||||||
|
|
|
@ -3,26 +3,29 @@
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
/**
|
/**
|
||||||
* @brief Compute the mean of input tensor's elements along certain axes.
|
* @brief Compute the reduction of input tensor's elements along certain axes.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class ReduceMeanObj : public OperatorObj {
|
class ReduceBaseObj : public OperatorObj {
|
||||||
|
protected:
|
||||||
set<int> axes; // axis to reduce
|
set<int> axes; // axis to reduce
|
||||||
bool keepDims;
|
bool keepDims;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @brief Construct a new ReduceMean object.
|
* @brief Construct a new Reduce object.
|
||||||
*
|
*
|
||||||
* @param graph The computation graph that this operator belongs to.
|
* @param graph The computation graph that this operator belongs to.
|
||||||
|
* @param opType The operation type. Should be a Reduce operation.
|
||||||
* @param input The input tensor.
|
* @param input The input tensor.
|
||||||
* @param output The output tensor.
|
* @param output The output tensor.
|
||||||
* @param axes Axes to reduce.
|
* @param axes Axes to reduce.
|
||||||
* @param keepDims Keep the reduced dimensions or not.
|
* @param keepDims Keep the reduced dimensions or not.
|
||||||
*/
|
*/
|
||||||
ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
|
ReduceBaseObj(GraphObj *graph, OpType opType, Tensor input, Tensor output,
|
||||||
const optional<vector<int>> &axes, bool keepDims = true);
|
const optional<vector<int>> &axes, bool keepDims);
|
||||||
OP_CLONE(ReduceMeanObj);
|
virtual ~ReduceBaseObj() {}
|
||||||
|
OP_CLONE(ReduceBaseObj);
|
||||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
||||||
|
|
||||||
std::string toString() const override;
|
std::string toString() const override;
|
||||||
|
@ -38,4 +41,15 @@ class ReduceMeanObj : public OperatorObj {
|
||||||
vector<int> getOpAttrVector() const override;
|
vector<int> getOpAttrVector() const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ReduceMeanObj : public ReduceBaseObj {
|
||||||
|
public:
|
||||||
|
ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
|
const optional<vector<int>> &axes, bool keepDims = true);
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReduceSumObj : public ReduceBaseObj {
|
||||||
|
public:
|
||||||
|
ReduceSumObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
|
const optional<vector<int>> &axes, bool keepDims = true);
|
||||||
|
};
|
||||||
} // namespace infini
|
} // namespace infini
|
|
@ -604,7 +604,7 @@ class OnnxStub:
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif node.op_type == "ReduceMean":
|
elif node.op_type == "ReduceMean":
|
||||||
tensors[node.output[0]] = self.handler.reduce_mean(
|
tensors[node.output[0]] = self.handler.reduceMean(
|
||||||
tensors[node.input[0]],
|
tensors[node.input[0]],
|
||||||
tensors.get(node.output[0]),
|
tensors.get(node.output[0]),
|
||||||
# NOTE(constroy): `axes` is an attribute until opset version 13.
|
# NOTE(constroy): `axes` is an attribute until opset version 13.
|
||||||
|
@ -678,12 +678,40 @@ class OnnxStub:
|
||||||
next((attr.i for attr in node.attribute if attr.name == "to")),
|
next((attr.i for attr in node.attribute if attr.name == "to")),
|
||||||
)
|
)
|
||||||
elif node.op_type == "ReduceSum":
|
elif node.op_type == "ReduceSum":
|
||||||
# ReduceSum is only implemented as allReduceSum.
|
if any(attr.name == "communicator" for attr in node.attribute):
|
||||||
assert any(attr.name == "communicator" for attr in node.attribute)
|
# ReduceSum with communicator is treated as allReduceSum.
|
||||||
tensors[node.output[0]] = self.handler.allReduceSum(
|
tensors[node.output[0]] = self.handler.allReduceSum(
|
||||||
tensors[node.input[0]],
|
tensors[node.input[0]],
|
||||||
tensors.get(node.output[0]),
|
tensors.get(node.output[0]),
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# NOTE: `axes` is an attribute until opset version 13.
|
||||||
|
if len(node.input) > 1:
|
||||||
|
axis = _parse_data(data[node.input[1]])
|
||||||
|
else:
|
||||||
|
axis = next(
|
||||||
|
(
|
||||||
|
attr.ints
|
||||||
|
for attr in node.attribute
|
||||||
|
if attr.name == "axes"
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
keepdims = next(
|
||||||
|
(
|
||||||
|
attr.i
|
||||||
|
for attr in node.attribute
|
||||||
|
if attr.name == "keepdims"
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
) != 0
|
||||||
|
|
||||||
|
tensors[node.output[0]] = self.handler.reduceSum(
|
||||||
|
tensors[node.input[0]],
|
||||||
|
tensors.get(node.output[0]),
|
||||||
|
axis,
|
||||||
|
keepdims,
|
||||||
|
)
|
||||||
elif node.op_type == "AllReduceSum":
|
elif node.op_type == "AllReduceSum":
|
||||||
tensors[node.output[0]] = self.handler.allReduceSum(
|
tensors[node.output[0]] = self.handler.allReduceSum(
|
||||||
tensors[node.input[0]],
|
tensors[node.input[0]],
|
||||||
|
@ -1044,8 +1072,11 @@ class OnnxStub:
|
||||||
elif ty == backend.OpTypeId.Gather:
|
elif ty == backend.OpTypeId.Gather:
|
||||||
axis = backend.gather_axis_of(op)
|
axis = backend.gather_axis_of(op)
|
||||||
ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis))
|
ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis))
|
||||||
elif ty == backend.OpTypeId.ReduceMean:
|
elif ty in [
|
||||||
axes, keepdims = backend.reduce_mean_attrs_of(op)
|
backend.OpTypeId.ReduceMean,
|
||||||
|
backend.OpTypeId.ReduceSum
|
||||||
|
]:
|
||||||
|
axes, keepdims = backend.reduce_attrs_of(op)
|
||||||
inputs.append(
|
inputs.append(
|
||||||
ctx.push_data_input(
|
ctx.push_data_input(
|
||||||
name, "axes", TensorProto.INT64, [len(axes)], axes
|
name, "axes", TensorProto.INT64, [len(axes)], axes
|
||||||
|
|
|
@ -337,6 +337,14 @@ class TestStringMethods(unittest.TestCase):
|
||||||
"ReduceMean", ["data"], ["reduced"], keepdims=1, name="reduceMean"
|
"ReduceMean", ["data"], ["reduced"], keepdims=1, name="reduceMean"
|
||||||
)
|
)
|
||||||
make_and_import_model(make_graph([reduceMean], "reduceMean", [data], [reduced]))
|
make_and_import_model(make_graph([reduceMean], "reduceMean", [data], [reduced]))
|
||||||
|
|
||||||
|
def test_reduce_sum(self):
|
||||||
|
data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 3, 4])
|
||||||
|
reduced = make_tensor_value_info("reduced", TensorProto.FLOAT, [1, 1, 1, 1])
|
||||||
|
reduceSum = make_node(
|
||||||
|
"ReduceSum", ["data"], ["reduced"], keepdims=1, name="reduceSum"
|
||||||
|
)
|
||||||
|
make_and_import_model(make_graph([reduceSum], "reduceSum", [data], [reduced]))
|
||||||
|
|
||||||
def test_slice(self):
|
def test_slice(self):
|
||||||
data = make_tensor_value_info("data", TensorProto.UINT32, [10, 64, 162, 162])
|
data = make_tensor_value_info("data", TensorProto.UINT32, [10, 64, 162, 162])
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
#include "operators/matmul.h"
|
#include "operators/matmul.h"
|
||||||
#include "operators/pad.h"
|
#include "operators/pad.h"
|
||||||
#include "operators/pooling.h"
|
#include "operators/pooling.h"
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
#include "operators/reshape.h"
|
#include "operators/reshape.h"
|
||||||
#include "operators/slice.h"
|
#include "operators/slice.h"
|
||||||
#include "operators/softmax.h"
|
#include "operators/softmax.h"
|
||||||
|
@ -302,18 +302,23 @@ Tensor GraphHandlerObj::gatherElements(Tensor data, Tensor indices,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor GraphHandlerObj::reduceMean(Tensor data, Tensor reduced,
|
#define DEFINE_REDUCE_METHOD(name, obj) \
|
||||||
const optional<vector<int>> &axes,
|
Tensor GraphHandlerObj::name(Tensor data, Tensor reduced, \
|
||||||
bool keepdims) {
|
const optional<vector<int>> &axes, \
|
||||||
if (reduced) {
|
bool keepdims) { \
|
||||||
g->addOpWithOutputs<ReduceMeanObj>(std::move(data), reduced, axes,
|
if (reduced) { \
|
||||||
keepdims);
|
g->addOpWithOutputs<_CAT(obj, Obj)>(std::move(data), reduced, \
|
||||||
return reduced;
|
axes, keepdims); \
|
||||||
} else {
|
return reduced; \
|
||||||
return g->addOp<ReduceMeanObj>(std::move(data), reduced, axes, keepdims)
|
} else { \
|
||||||
->getOutput();
|
return g \
|
||||||
|
->addOp<_CAT(obj, Obj)>(std::move(data), reduced, axes, \
|
||||||
|
keepdims) \
|
||||||
|
->getOutput(); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
}
|
DEFINE_REDUCE_METHOD(reduceMean, ReduceMean)
|
||||||
|
DEFINE_REDUCE_METHOD(reduceSum, ReduceSum)
|
||||||
|
|
||||||
Tensor GraphHandlerObj::slice(Tensor input, Tensor output,
|
Tensor GraphHandlerObj::slice(Tensor input, Tensor output,
|
||||||
const vector<int> &starts,
|
const vector<int> &starts,
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include "operators/matmul.h"
|
#include "operators/matmul.h"
|
||||||
#include "operators/pad.h"
|
#include "operators/pad.h"
|
||||||
#include "operators/pooling.h"
|
#include "operators/pooling.h"
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
#include "operators/reshape.h"
|
#include "operators/reshape.h"
|
||||||
#include "operators/split.h"
|
#include "operators/split.h"
|
||||||
#include "operators/transpose.h"
|
#include "operators/transpose.h"
|
||||||
|
@ -90,6 +90,7 @@ void export_values(py::module &m) {
|
||||||
.VALUE(OpType, Gather)
|
.VALUE(OpType, Gather)
|
||||||
.VALUE(OpType, GatherElements)
|
.VALUE(OpType, GatherElements)
|
||||||
.VALUE(OpType, ReduceMean)
|
.VALUE(OpType, ReduceMean)
|
||||||
|
.VALUE(OpType, ReduceSum)
|
||||||
.VALUE(OpType, Reshape)
|
.VALUE(OpType, Reshape)
|
||||||
.VALUE(OpType, Flatten)
|
.VALUE(OpType, Flatten)
|
||||||
.VALUE(OpType, Identity)
|
.VALUE(OpType, Identity)
|
||||||
|
@ -219,12 +220,13 @@ clip_attrs_of(Operator op) {
|
||||||
return std::make_tuple(clip->getMin(), clip->getMax());
|
return std::make_tuple(clip->getMin(), clip->getMax());
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::tuple<vector<int>, bool> reduce_mean_attrs_of(Operator op) {
|
static std::tuple<vector<int>, bool> reduce_attrs_of(Operator op) {
|
||||||
IT_ASSERT(op->getOpType() == OpType::ReduceMean);
|
IT_ASSERT(op->getOpType() == OpType::ReduceMean ||
|
||||||
auto reduce_mean = dynamic_cast<const ReduceMeanObj *>(op.get());
|
op->getOpType() == OpType::ReduceSum);
|
||||||
auto &set = reduce_mean->getAxes();
|
auto reduce = dynamic_cast<const ReduceBaseObj *>(op.get());
|
||||||
|
auto &set = reduce->getAxes();
|
||||||
return std::make_tuple(vector(set.begin(), set.end()),
|
return std::make_tuple(vector(set.begin(), set.end()),
|
||||||
reduce_mean->getKeepDims());
|
reduce->getKeepDims());
|
||||||
}
|
}
|
||||||
|
|
||||||
static int concat_axis_of(Operator op) {
|
static int concat_axis_of(Operator op) {
|
||||||
|
@ -319,7 +321,7 @@ void export_functions(py::module &m) {
|
||||||
.FUNCTION(batch_norm_attrs_of)
|
.FUNCTION(batch_norm_attrs_of)
|
||||||
.FUNCTION(pool_attrs_of)
|
.FUNCTION(pool_attrs_of)
|
||||||
.FUNCTION(clip_attrs_of)
|
.FUNCTION(clip_attrs_of)
|
||||||
.FUNCTION(reduce_mean_attrs_of)
|
.FUNCTION(reduce_attrs_of)
|
||||||
.FUNCTION(tensor_dtype)
|
.FUNCTION(tensor_dtype)
|
||||||
.FUNCTION(reshape_shape_of)
|
.FUNCTION(reshape_shape_of)
|
||||||
.FUNCTION(expand_shape_of)
|
.FUNCTION(expand_shape_of)
|
||||||
|
@ -497,7 +499,8 @@ void init_graph_builder(py::module &m) {
|
||||||
.def("split", &Handler::split, policy::move)
|
.def("split", &Handler::split, policy::move)
|
||||||
.def("gather", &Handler::gather, policy::move)
|
.def("gather", &Handler::gather, policy::move)
|
||||||
.def("gatherElements", &Handler::gatherElements, policy::move)
|
.def("gatherElements", &Handler::gatherElements, policy::move)
|
||||||
.def("reduce_mean", &Handler::reduceMean, policy::move)
|
.def("reduceMean", &Handler::reduceMean, policy::move)
|
||||||
|
.def("reduceSum", &Handler::reduceSum, policy::move)
|
||||||
.def("slice", &Handler::slice, policy::move)
|
.def("slice", &Handler::slice, policy::move)
|
||||||
.def("pad", &Handler::pad, policy::move)
|
.def("pad", &Handler::pad, policy::move)
|
||||||
.def("allReduceSum", &Handler::allReduceSum, policy::move)
|
.def("allReduceSum", &Handler::allReduceSum, policy::move)
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
#include "cuda/cuda_kernel_wihtout_config.h"
|
#include "cuda/cuda_kernel_wihtout_config.h"
|
||||||
#include "cuda/cuda_runtime.h"
|
#include "cuda/cuda_runtime.h"
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
class ReduceMeanCudnn : public CudaKernelWithoutConfig {
|
class ReduceCudnnBase : public CudaKernelWithoutConfig {
|
||||||
|
virtual cudnnReduceTensorOp_t getReduceOp() const = 0;
|
||||||
|
|
||||||
void compute(const Operator &_op,
|
void compute(const Operator &_op,
|
||||||
const RuntimeObj *_context) const override {
|
const RuntimeObj *_context) const override {
|
||||||
auto op = as<ReduceMeanObj>(_op);
|
auto op = as<ReduceBaseObj>(_op);
|
||||||
auto input = op->getInputs(0);
|
auto input = op->getInputs(0);
|
||||||
auto output = op->getOutput();
|
auto output = op->getOutput();
|
||||||
auto context = dynamic_cast<const CudaRuntimeObj *>(_context);
|
auto context = dynamic_cast<const CudaRuntimeObj *>(_context);
|
||||||
|
@ -71,7 +73,7 @@ class ReduceMeanCudnn : public CudaKernelWithoutConfig {
|
||||||
cudnnReduceTensorDescriptor_t reduceDesc;
|
cudnnReduceTensorDescriptor_t reduceDesc;
|
||||||
checkCudnnError(cudnnCreateReduceTensorDescriptor(&reduceDesc));
|
checkCudnnError(cudnnCreateReduceTensorDescriptor(&reduceDesc));
|
||||||
checkCudnnError(cudnnSetReduceTensorDescriptor(
|
checkCudnnError(cudnnSetReduceTensorDescriptor(
|
||||||
reduceDesc, CUDNN_REDUCE_TENSOR_AVG, CUDNN_DATA_FLOAT,
|
reduceDesc, getReduceOp(), CUDNN_DATA_FLOAT,
|
||||||
CUDNN_NOT_PROPAGATE_NAN, CUDNN_REDUCE_TENSOR_NO_INDICES,
|
CUDNN_NOT_PROPAGATE_NAN, CUDNN_REDUCE_TENSOR_NO_INDICES,
|
||||||
CUDNN_32BIT_INDICES));
|
CUDNN_32BIT_INDICES));
|
||||||
|
|
||||||
|
@ -106,6 +108,20 @@ class ReduceMeanCudnn : public CudaKernelWithoutConfig {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ReduceMeanCudnn : public ReduceCudnnBase {
|
||||||
|
cudnnReduceTensorOp_t getReduceOp() const override {
|
||||||
|
return CUDNN_REDUCE_TENSOR_AVG;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReduceSumCudnn : public ReduceCudnnBase {
|
||||||
|
cudnnReduceTensorOp_t getReduceOp() const override {
|
||||||
|
return CUDNN_REDUCE_TENSOR_ADD;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL(Device::CUDA, OpType::ReduceMean, DataType::Float32,
|
REGISTER_KERNEL(Device::CUDA, OpType::ReduceMean, DataType::Float32,
|
||||||
ReduceMeanCudnn, "ReduceMean_cuDNN_CUDA_Float32");
|
ReduceMeanCudnn, "ReduceMean_cuDNN_CUDA_Float32");
|
||||||
|
REGISTER_KERNEL(Device::CUDA, OpType::ReduceSum, DataType::Float32,
|
||||||
|
ReduceSumCudnn, "ReduceSum_cuDNN_CUDA_Float32");
|
||||||
}; // namespace infini
|
}; // namespace infini
|
|
@ -1,10 +1,11 @@
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
#include "utils/operator_utils.h"
|
#include "utils/operator_utils.h"
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
ReduceMeanObj::ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
|
ReduceBaseObj::ReduceBaseObj(GraphObj *graph, OpType opType, Tensor input,
|
||||||
const optional<vector<int>> &_axes, bool keepDims)
|
Tensor output, const optional<vector<int>> &_axes,
|
||||||
: OperatorObj(OpType::ReduceMean, {input}, {output}), keepDims(keepDims) {
|
bool keepDims)
|
||||||
|
: OperatorObj(opType, {input}, {output}), keepDims(keepDims) {
|
||||||
const auto size = input->getRank();
|
const auto size = input->getRank();
|
||||||
if (_axes) {
|
if (_axes) {
|
||||||
for (auto idx : *_axes) {
|
for (auto idx : *_axes) {
|
||||||
|
@ -17,11 +18,11 @@ ReduceMeanObj::ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
IT_ASSERT(checkValid(graph));
|
IT_ASSERT(checkValid(graph));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceMeanObj::isReduced(int idx) const {
|
bool ReduceBaseObj::isReduced(int idx) const {
|
||||||
return axes.find(idx) != axes.end();
|
return axes.find(idx) != axes.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
optional<vector<Shape>> ReduceMeanObj::inferShape(const TensorVec &inputs) {
|
optional<vector<Shape>> ReduceBaseObj::inferShape(const TensorVec &inputs) {
|
||||||
auto dims = inputs[0]->getDims();
|
auto dims = inputs[0]->getDims();
|
||||||
auto rank = inputs[0]->getRank();
|
auto rank = inputs[0]->getRank();
|
||||||
|
|
||||||
|
@ -43,10 +44,9 @@ optional<vector<Shape>> ReduceMeanObj::inferShape(const TensorVec &inputs) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ReduceMeanObj::toString() const {
|
std::string ReduceBaseObj::toString() const {
|
||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
os << "ReduceMean"
|
os << type.toString() << "[" << getGuid() << "]";
|
||||||
<< "[" << getGuid() << "]";
|
|
||||||
os << "(";
|
os << "(";
|
||||||
os << vecToString(inputs[0]->getDims()) << ",";
|
os << vecToString(inputs[0]->getDims()) << ",";
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ std::string ReduceMeanObj::toString() const {
|
||||||
return os.str();
|
return os.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<int> ReduceMeanObj::getWorkloadVector() const {
|
vector<int> ReduceBaseObj::getWorkloadVector() const {
|
||||||
vector<int> ret = inputs[0]->getDims();
|
vector<int> ret = inputs[0]->getDims();
|
||||||
ret.emplace(ret.begin(), type.underlying());
|
ret.emplace(ret.begin(), type.underlying());
|
||||||
ret.emplace_back((int)keepDims);
|
ret.emplace_back((int)keepDims);
|
||||||
|
@ -74,9 +74,18 @@ vector<int> ReduceMeanObj::getWorkloadVector() const {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<int> ReduceMeanObj::getOpAttrVector() const {
|
vector<int> ReduceBaseObj::getOpAttrVector() const {
|
||||||
vector<int> ret = {type.underlying(), (int)keepDims};
|
vector<int> ret = {type.underlying(), (int)keepDims};
|
||||||
ret.insert(ret.end(), axes.begin(), axes.end());
|
ret.insert(ret.end(), axes.begin(), axes.end());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReduceMeanObj::ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
|
const optional<vector<int>> &_axes, bool keepDims)
|
||||||
|
: ReduceBaseObj(graph, OpType::ReduceMean, input, output, _axes, keepDims) {
|
||||||
|
}
|
||||||
|
|
||||||
|
ReduceSumObj::ReduceSumObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
|
const optional<vector<int>> &_axes, bool keepDims)
|
||||||
|
: ReduceBaseObj(graph, OpType::ReduceSum, input, output, _axes, keepDims) {}
|
||||||
} // namespace infini
|
} // namespace infini
|
|
@ -7,7 +7,7 @@
|
||||||
#include "operators/extend.h"
|
#include "operators/extend.h"
|
||||||
#include "operators/pad.h"
|
#include "operators/pad.h"
|
||||||
#include "operators/pooling.h"
|
#include "operators/pooling.h"
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
#include "operators/slice.h"
|
#include "operators/slice.h"
|
||||||
#include "operators/split.h"
|
#include "operators/split.h"
|
||||||
#include "operators/unary.h"
|
#include "operators/unary.h"
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
#include "core/graph.h"
|
||||||
|
#include "core/kernel.h"
|
||||||
|
#include "core/runtime.h"
|
||||||
|
#include "cuda/cuda_runtime.h"
|
||||||
|
#include "cuda/cuda_utility.h"
|
||||||
|
#include "operators/reduce.h"
|
||||||
|
|
||||||
|
#include "test.h"
|
||||||
|
|
||||||
|
namespace infini {
|
||||||
|
|
||||||
|
template <typename ReduceObjT>
|
||||||
|
void test_reduce(const Shape &shape, const vector<float> &data,
|
||||||
|
const optional<const vector<int>> &axis, bool keepDims,
|
||||||
|
const vector<float> &ExpectData) {
|
||||||
|
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||||
|
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||||
|
|
||||||
|
// Build input data on CPU
|
||||||
|
Tensor icpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||||
|
|
||||||
|
// Build CUDA graph
|
||||||
|
Graph g = make_ref<GraphObj>(cudaRuntime);
|
||||||
|
auto i = g->cloneTensor(icpu);
|
||||||
|
auto op = g->addOp<ReduceObjT>(i, nullptr, axis, keepDims);
|
||||||
|
|
||||||
|
// allocate CUDA memory
|
||||||
|
g->dataMalloc();
|
||||||
|
i->copyin(data);
|
||||||
|
|
||||||
|
// Execute on CUDA
|
||||||
|
cudaRuntime->run(g);
|
||||||
|
|
||||||
|
// clone CUDA output to CPU
|
||||||
|
auto o = op->getOutput();
|
||||||
|
auto ocpu = o->clone(cpuRuntime);
|
||||||
|
|
||||||
|
// check results on CPU
|
||||||
|
EXPECT_TRUE(ocpu->equalData(ExpectData));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CUDA_ReduceMean, run) {
|
||||||
|
test_reduce<ReduceMeanObj>(
|
||||||
|
Shape{3, 2, 2}, vector<float>{5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2},
|
||||||
|
std::nullopt, true, vector<float>{18.25});
|
||||||
|
test_reduce<ReduceMeanObj>(
|
||||||
|
Shape{1, 3, 2, 2, 1},
|
||||||
|
vector<float>{5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2}, std::nullopt,
|
||||||
|
false, vector<float>{18.25});
|
||||||
|
|
||||||
|
test_reduce<ReduceMeanObj>(
|
||||||
|
Shape{2, 3, 2, 2},
|
||||||
|
vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||||
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
|
||||||
|
vector<int>{1, 2}, false, vector<float>{5, 6, 17, 18});
|
||||||
|
test_reduce<ReduceMeanObj>(
|
||||||
|
Shape{2, 3, 2, 2, 1},
|
||||||
|
vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||||
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
|
||||||
|
vector<int>{1, 2}, true, vector<float>{5, 6, 17, 18});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CUDA_ReduceSum, run) {
|
||||||
|
test_reduce<ReduceSumObj>(Shape{3, 2, 2},
|
||||||
|
vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||||
|
std::nullopt, true, vector<float>{12});
|
||||||
|
test_reduce<ReduceSumObj>(Shape{1, 3, 2, 2, 1},
|
||||||
|
vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||||
|
std::nullopt, false, vector<float>{12});
|
||||||
|
|
||||||
|
test_reduce<ReduceSumObj>(
|
||||||
|
Shape{2, 3, 2, 2},
|
||||||
|
vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||||
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
|
||||||
|
vector<int>{1, 2}, false, vector<float>{30, 36, 102, 108});
|
||||||
|
test_reduce<ReduceSumObj>(
|
||||||
|
Shape{2, 3, 2, 2, 1},
|
||||||
|
vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||||
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
|
||||||
|
vector<int>{1, 2}, true, vector<float>{30, 36, 102, 108});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace infini
|
|
@ -1,61 +0,0 @@
|
||||||
#include "core/graph.h"
|
|
||||||
#include "core/kernel.h"
|
|
||||||
#include "core/runtime.h"
|
|
||||||
#include "cuda/cuda_runtime.h"
|
|
||||||
#include "cuda/cuda_utility.h"
|
|
||||||
#include "operators/reduce_mean.h"
|
|
||||||
|
|
||||||
#include "test.h"
|
|
||||||
|
|
||||||
namespace infini {
|
|
||||||
|
|
||||||
void test_reducemean(const Shape &shape, const vector<float> &data,
|
|
||||||
const optional<const vector<int>> &axis, bool keepDims,
|
|
||||||
const vector<float> &ExpectData) {
|
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
||||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
|
||||||
|
|
||||||
// Build input data on CPU
|
|
||||||
Tensor icpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
|
||||||
|
|
||||||
// Build CUDA graph
|
|
||||||
Graph g = make_ref<GraphObj>(cudaRuntime);
|
|
||||||
auto i = g->cloneTensor(icpu);
|
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, axis, keepDims);
|
|
||||||
|
|
||||||
// allocate CUDA memory
|
|
||||||
g->dataMalloc();
|
|
||||||
i->copyin(data);
|
|
||||||
|
|
||||||
// Execute on CUDA
|
|
||||||
cudaRuntime->run(g);
|
|
||||||
|
|
||||||
// clone CUDA output to CPU
|
|
||||||
auto o = op->getOutput();
|
|
||||||
auto ocpu = o->clone(cpuRuntime);
|
|
||||||
|
|
||||||
// check results on CPU
|
|
||||||
EXPECT_TRUE(ocpu->equalData(ExpectData));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(CUDA_ReduceMean, run) {
|
|
||||||
test_reducemean(Shape{3, 2, 2},
|
|
||||||
vector<float>{5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2},
|
|
||||||
std::nullopt, true, vector<float>{18.25});
|
|
||||||
test_reducemean(Shape{1, 3, 2, 2, 1},
|
|
||||||
vector<float>{5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2},
|
|
||||||
std::nullopt, false, vector<float>{18.25});
|
|
||||||
|
|
||||||
test_reducemean(Shape{2, 3, 2, 2},
|
|
||||||
vector<float>{0, 1, 2, 3, 4, 5, 6, 7,
|
|
||||||
8, 9, 10, 11, 12, 13, 14, 15,
|
|
||||||
16, 17, 18, 19, 20, 21, 22, 23},
|
|
||||||
vector<int>{1, 2}, false, vector<float>{5, 6, 17, 18});
|
|
||||||
test_reducemean(Shape{2, 3, 2, 2, 1},
|
|
||||||
vector<float>{0, 1, 2, 3, 4, 5, 6, 7,
|
|
||||||
8, 9, 10, 11, 12, 13, 14, 15,
|
|
||||||
16, 17, 18, 19, 20, 21, 22, 23},
|
|
||||||
vector<int>{1, 2}, true, vector<float>{5, 6, 17, 18});
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace infini
|
|
|
@ -1,51 +1,55 @@
|
||||||
#include "core/graph.h"
|
#include "core/graph.h"
|
||||||
#include "core/kernel.h"
|
#include "core/kernel.h"
|
||||||
#include "core/runtime.h"
|
#include "core/runtime.h"
|
||||||
#include "operators/reduce_mean.h"
|
#include "operators/reduce.h"
|
||||||
|
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
|
|
||||||
TEST(ReduceMean, ShapeInference) {
|
template <typename ReduceObjT> void testShapeInference() {
|
||||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, std::nullopt, true);
|
auto op = g->addOp<ReduceObjT>(i, nullptr, std::nullopt, true);
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 1, 1}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 1, 1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, vector<int>{1, 3}, true);
|
auto op = g->addOp<ReduceObjT>(i, nullptr, vector<int>{1, 3}, true);
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 1, 3, 1}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 1, 3, 1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, vector<int>{-3, 3}, true);
|
auto op = g->addOp<ReduceObjT>(i, nullptr, vector<int>{-3, 3}, true);
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 1, 3, 1}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 1, 3, 1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, std::nullopt, false);
|
auto op = g->addOp<ReduceObjT>(i, nullptr, std::nullopt, false);
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op = g->addOp<ReduceMeanObj>(i, nullptr, vector<int>{1, 3}, false);
|
auto op = g->addOp<ReduceObjT>(i, nullptr, vector<int>{1, 3}, false);
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Graph g = make_ref<GraphObj>(runtime);
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32);
|
||||||
auto op =
|
auto op = g->addOp<ReduceObjT>(i, nullptr, vector<int>{-3, 3}, false);
|
||||||
g->addOp<ReduceMeanObj>(i, nullptr, vector<int>{-3, 3}, false);
|
|
||||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3}));
|
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ReduceMean, ShapeInference) {
|
||||||
|
testShapeInference<ReduceMeanObj>();
|
||||||
|
testShapeInference<ReduceSumObj>();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace infini
|
} // namespace infini
|
Loading…
Reference in New Issue