support mixed dtype (#102)

* feat: support mixed dtype

* feat: support cast op

* test: add test for cast op

* feat: support datatype BFloat16

* feat: support data convert fp32 <-> bfp16

* fix: fix all op's infershape func

* fix as review comment
This commit is contained in:
zhangyunze 2023-08-16 21:49:43 +08:00 committed by GitHub
parent 0dc5347089
commit ef672894d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
55 changed files with 992 additions and 712 deletions

View File

@ -19,6 +19,7 @@ class DataType {
static const DataType Double;
static const DataType UInt32;
static const DataType UInt64;
static const DataType BFloat16;
// "sizePerElement" show the DType to cpu_type
// DataType::Bool -> int8_t DataType::Float16 -> uint16_t
static constexpr size_t sizePerElement[]{0,
@ -34,14 +35,19 @@ class DataType {
sizeof(uint16_t),
sizeof(double),
sizeof(uint32_t),
sizeof(uint64_t)};
sizeof(uint64_t),
0,
0,
sizeof(uint16_t)};
static constexpr std::string_view names[]{
"Undefine", "Float32", "UInt8", "Int8", "UInt16",
"Int16", "Int32", "Int64", "String", "Bool",
"Float16", "Double", "UInt32", "UInt64"};
"Float16", "Double", "UInt32", "UInt64", "PlaceHolder",
"PlaceHolder", "BFloat16"};
static constexpr int cpuType[]{-1, 0, 2, 3, 4, 5, 6, 7, -1, 3, 4, 9, 1, 8};
static constexpr int cpuType[]{-1, 0, 2, 3, 4, 5, 6, 7, -1,
3, 4, 9, 1, 8, -1, -1, 4};
private:
int index;
@ -79,6 +85,7 @@ inline const DataType DataType::Float16(10);
inline const DataType DataType::Double(11);
inline const DataType DataType::UInt32(12);
inline const DataType DataType::UInt64(13);
inline const DataType DataType::BFloat16(16);
// Method definitions are out of the declaration due to GCC bug:
// https://stackoverflow.com/questions/49707184/explicit-specialization-in-non-namespace-scope-does-not-compile-in-gcc
template <> inline int DataType::get<float>() { return 0; }
@ -107,5 +114,6 @@ template <> struct DT<10> { using t = uint16_t; };
template <> struct DT<11> { using t = double; };
template <> struct DT<12> { using t = uint32_t; };
template <> struct DT<13> { using t = uint64_t; };
template <> struct DT<16> { using t = uint16_t; };
} // namespace infini

View File

@ -66,6 +66,7 @@ class GraphHandlerObj {
const optional<vector<int>> &steps);
Tensor pad(Tensor input, Tensor output, const vector<int> &pads,
const optional<vector<int>> &axes);
Tensor cast(Tensor input, Tensor output, int to);
//------ modifiers

View File

@ -36,6 +36,7 @@ class TensorObj : public TensorBaseObj {
size_t getBytes() const { return _size * dtype.getSize(); }
Shape getDims() const { return shape; }
size_t getRank() const { return shape.size(); }
vector<size_t> getStride() const;
size_t getOffset(const vector<int> &ds) const;
void dataMalloc();
@ -330,7 +331,7 @@ class TensorObj : public TensorBaseObj {
// }
// void initSplittingPoints() {
// splittingPoints.resize(getDims().size()); }
// splittingPoints.resize(getRank()); }
// void printShape();
};

View File

@ -15,7 +15,7 @@ class TransposeObj : public OperatorObj {
std::vector<int> getPermute() const { return transposePermute; }
private:
vector<int> transposePermute = {1, 1, 1, 1};
vector<int> transposePermute;
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
};

View File

@ -134,17 +134,17 @@ class TransformObj : public OperatorObj {
vector<int> getOpAttrVector() const override;
};
class CastObj : public OperatorObj {
public:
enum CastType {
Float2Half = 0,
enum class CastType {
Float2Float16 = 0,
Float2Int64,
Float2Int32,
Float2Int16,
Float2Int8,
Float2BFloat16,
Int322Float,
Int322Int8,
Int322Int16,
Int322Int64,
Int162Float,
Int162Int32,
Int82Float,
@ -153,12 +153,16 @@ class CastObj : public OperatorObj {
Uint82Float,
Uint82Int32,
Uint82Int64,
Int322Int64,
Int642Int32,
Int642Uint32,
Int642Float,
Uint322Int64,
Float162Float,
BFloat162Float,
};
class CastObj : public OperatorObj {
public:
CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type);
OP_CLONE(CastObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;

View File

@ -8,4 +8,6 @@ union Uf32 {
};
uint16_t float_to_fp16(const float x);
float fp16_to_float(const uint16_t x);
uint16_t float_to_bfp16(const float x);
float bfp16_to_float(const uint16_t x);
} // namespace infini

View File

@ -0,0 +1,15 @@
#pragma once
#ifndef OPERATOR_UTIL_H
#define OPERATOR_UTIL_H
#include "core/tensor.h"
namespace infini {
// Launch a broadcast shape based on the shape of input A and B
Shape infer_broadcast(const Shape &A, const Shape &B);
// Launch the real axis based on rank and current axis
int get_real_axis(const int &axis, const int &rank);
} // namespace infini
#endif

View File

@ -62,7 +62,17 @@ class OnnxStub:
tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
data[initializer.name] = initializer
node_name = []
new_node_name = []
for node in model.graph.node:
node_name.append(node.name)
node_list = model.graph.node
while len(node_list) != 0:
for node in model.graph.node:
if node.name not in node_list:
continue
if _analyse_node(node, tensors):
continue
if node.op_type == "Conv":
attributes = _parse_attribute(
node,
@ -170,7 +180,8 @@ class OnnxStub:
node, {"alpha": 1.0, "beta": 1.0, "transA": 0, "transB": 0}
)
(alpha, beta, transA, transB) = (
attributes[name] for name in ["alpha", "beta", "transA", "transB"]
attributes[name]
for name in ["alpha", "beta", "transA", "transB"]
)
# FIXME unsupport attributes: `alpha` `beta`
assert alpha == 1.0
@ -197,7 +208,15 @@ class OnnxStub:
for name in ["momentum", "epsilon", "training_mode"]
)
tensors[node.output[0]] = self.handler.batchNormalization(
input, output, mean, var, scale, bias, momentum, eps, training != 0
input,
output,
mean,
var,
scale,
bias,
momentum,
eps,
training != 0,
)
elif node.op_type == "MaxPool":
attributes = _parse_attribute(
@ -349,7 +368,8 @@ class OnnxStub:
tensors[node.input[0]],
tensors.get(node.output[0]),
next(
(attr.i for attr in node.attribute if attr.name == "axis"), -1
(attr.i for attr in node.attribute if attr.name == "axis"),
-1,
),
)
elif node.op_type == "Abs":
@ -371,7 +391,9 @@ class OnnxStub:
tensors[node.output[0]] = self.handler.flatten(
tensors[node.input[0]],
tensors.get(node.output[0]),
next((attr.i for attr in node.attribute if attr.name == "axis")),
next(
(attr.i for attr in node.attribute if attr.name == "axis")
),
)
elif node.op_type == "PRelu":
tensors[node.output[0]] = self.handler.pRelu(
@ -392,7 +414,8 @@ class OnnxStub:
)
elif node.op_type == "Transpose":
perm = next(
(attr.ints for attr in node.attribute if attr.name == "perm"), None
(attr.ints for attr in node.attribute if attr.name == "perm"),
None,
)
tensors[node.output[0]] = self.handler.transpose(
tensors[node.input[0]],
@ -449,7 +472,9 @@ class OnnxStub:
tensors[node.output[0]] = self.handler.concat(
[tensors[name] for name in node.input],
tensors.get(node.output[0]),
next((attr.i for attr in node.attribute if attr.name == "axis")),
next(
(attr.i for attr in node.attribute if attr.name == "axis")
),
)
elif node.op_type == "Split":
for name, tensor in zip(
@ -458,7 +483,11 @@ class OnnxStub:
tensors[node.input[0]],
None,
next(
(attr.i for attr in node.attribute if attr.name == "axis"),
(
attr.i
for attr in node.attribute
if attr.name == "axis"
),
0,
),
len(node.output),
@ -470,7 +499,9 @@ class OnnxStub:
tensors[node.input[0]],
tensors[node.input[1]],
tensors.get(node.output[0]),
next((attr.i for attr in node.attribute if attr.name == "axis")),
next(
(attr.i for attr in node.attribute if attr.name == "axis")
),
)
elif node.op_type == "ReduceMean":
tensors[node.output[0]] = self.handler.reduce_mean(
@ -478,10 +509,20 @@ class OnnxStub:
tensors.get(node.output[0]),
# NOTE(constroy): `axes` is an attribute until opset version 13.
next(
(attr.ints for attr in node.attribute if attr.name == "axes"),
(
attr.ints
for attr in node.attribute
if attr.name == "axes"
),
None,
),
next((attr.i for attr in node.attribute if attr.name == "keepdims"))
next(
(
attr.i
for attr in node.attribute
if attr.name == "keepdims"
)
)
!= 0,
)
elif node.op_type == "Slice":
@ -490,15 +531,21 @@ class OnnxStub:
tensors.get(node.output[0]),
_parse_data(data[node.input[1]]),
_parse_data(data[node.input[2]]),
_parse_data(data[node.input[3]]) if len(node.input) > 3 else None,
_parse_data(data[node.input[4]]) if len(node.input) > 4 else None,
_parse_data(data[node.input[3]])
if len(node.input) > 3
else None,
_parse_data(data[node.input[4]])
if len(node.input) > 4
else None,
)
elif node.op_type == "Pad":
tensors[node.output[0]] = self.handler.pad(
tensors[node.input[0]],
tensors.get(node.output[0]),
_parse_data(data[node.input[1]]),
_parse_data(data[node.input[3]]) if len(node.input) > 3 else None,
_parse_data(data[node.input[3]])
if len(node.input) > 3
else None,
)
elif node.op_type == "Dropout":
for name, tensor in zip(
@ -506,7 +553,9 @@ class OnnxStub:
self.handler.dropout(
tensors[node.input[0]],
tensors.get(node.output[0]),
tensors.get(node.output[1]) if len(node.output) > 1 else None,
tensors.get(node.output[1])
if len(node.output) > 1
else None,
_parse_data(data[node.input[1]])[0]
if len(node.input) > 1
else 0.5,
@ -516,8 +565,17 @@ class OnnxStub:
),
):
tensors[name] = tensor
elif node.op_type == "Cast":
tensors[node.output[0]] = self.handler.cast(
tensors[node.input[0]],
tensors.get(node.output[0]),
next((attr.i for attr in node.attribute if attr.name == "to")),
)
else:
raise Exception('Unsupported operator "{}"'.format(node.op_type))
new_node_name.append(node.name)
# update the node_list
node_list = list(set(node_name) - set(new_node_name))
self.handler.data_malloc()
@ -540,6 +598,8 @@ class OnnxStub:
obj.copyin_float16(_parse_data_fp16(tensor))
elif tensor.data_type == TensorProto.INT8:
obj.copyin_uint8(_parse_data(tensor))
elif tensor.data_type == TensorProto.BFLOAT16:
obj.copyin_float16(_parse_data_fp16(tensor))
else:
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
@ -823,6 +883,9 @@ class OnnxStub:
ctx.push_data_input(name, "max", TensorProto.FLOAT, [], [])
)
ctx.push_node(make_node(ty.name, inputs, outputs, name))
elif ty == backend.OpTypeId.Cast:
to = backend.cast_to_of(op)
ctx.push_node(make_node(ty.name, inputs, outputs, name, to=to))
else:
raise Exception("Unsupported OpType", ty)
@ -922,3 +985,10 @@ def _parse_data_fp16(tensor: TensorProto):
def _take_shape_dim(shape: TensorShapeProto) -> List[int]:
return [(d.dim_value if d.dim_value > 0 else 1) for d in shape.dim]
def _analyse_node(node: NodeProto, tensors) -> bool:
for i in node.input:
if i not in tensors:
return True
return False

View File

@ -79,6 +79,21 @@ class TestStringMethods(unittest.TestCase):
)
make_and_import_model(make_graph([conv], "conv_fp16", [i, w], [o]))
def test_conv_bfp16(self):
i = make_tensor_value_info("i", TensorProto.BFLOAT16, [1, 3, 4, 4])
w = make_tensor_value_info("w", TensorProto.BFLOAT16, [2, 3, 3, 3])
o = make_tensor_value_info("o", TensorProto.BFLOAT16, [1, 2, 2, 2])
conv = make_node(
"Conv",
["i", "w"],
["o"],
"conv",
pads=[1, 1, 1, 1],
strides=[2, 1],
dilations=[1, 2],
)
make_and_import_model(make_graph([conv], "conv_bfp16", [i, w], [o]))
def test_matmul(self):
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 4])
@ -226,9 +241,7 @@ class TestStringMethods(unittest.TestCase):
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
y = make_tensor_value_info("y", TensorProto.FLOAT, [1 * 3, 5 * 7])
flatten = make_node("Flatten", ["x"], ["y"], axis=2, name="flatten")
make_and_import_model(
make_graph([flatten], "flatten", [x], [y])
)
make_and_import_model(make_graph([flatten], "flatten", [x], [y]))
def test_reshape(self):
data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 4, 5])
@ -331,6 +344,14 @@ class TestStringMethods(unittest.TestCase):
y = handler.tensor([3, 2, 1], 12)
handler.reshape(x, y, [3, 2, 1])
def test_cast(self):
input1 = make_tensor_value_info("input1", TensorProto.FLOAT, [1, 3, 2, 4])
output = make_tensor_value_info("output", TensorProto.FLOAT16, [1, 3, 2, 4])
cast = make_node(
"Cast", ["input1"], ["output"], to=TensorProto.FLOAT16, name="cast"
)
make_and_import_model(make_graph([cast], "cast", [input1], [output]))
if __name__ == "__main__":
unittest.main()

View File

@ -18,6 +18,7 @@
namespace infini {
static DataType dtype_repr_convert(int);
static CastType inferCastType(Tensor input, int to);
Tensor GraphHandlerObj::tensor(Shape dims, int dtype) {
return g->addTensor(std::move(dims), dtype_repr_convert(dtype));
@ -293,6 +294,76 @@ Tensor GraphHandlerObj::pad(Tensor input, Tensor output,
}
}
Tensor GraphHandlerObj::cast(Tensor input, Tensor output, int to) {
if (output) {
g->addOpWithOutputs<CastObj>(std::move(input), output,
inferCastType(input, to));
return output;
} else {
return g
->addOp<CastObj>(std::move(input), output, inferCastType(input, to))
->getOutput();
}
}
static CastType inferCastType(Tensor input, int to) {
auto iType = input->getDType();
auto oType = DataType(to);
if (iType == DataType::Float32 && oType == DataType::Float16) {
return CastType::Float2Float16;
} else if (iType == DataType::Float32 && oType == DataType::Int64) {
return CastType::Float2Int64;
} else if (iType == DataType::Float32 && oType == DataType::Int32) {
return CastType::Float2Int32;
} else if (iType == DataType::Float32 && oType == DataType::Int16) {
return CastType::Float2Int16;
} else if (iType == DataType::Float32 && oType == DataType::Int8) {
return CastType::Float2Int8;
} else if (iType == DataType::Float32 && oType == DataType::BFloat16) {
return CastType::Float2BFloat16;
} else if (iType == DataType::Int32 && oType == DataType::Float32) {
return CastType::Int322Float;
} else if (iType == DataType::Int32 && oType == DataType::Int8) {
return CastType::Int322Int8;
} else if (iType == DataType::Int32 && oType == DataType::Int16) {
return CastType::Int322Int16;
} else if (iType == DataType::Int32 && oType == DataType::Int64) {
return CastType::Int322Int64;
} else if (iType == DataType::Int16 && oType == DataType::Int32) {
return CastType::Int162Int32;
} else if (iType == DataType::Int16 && oType == DataType::Float32) {
return CastType::Int162Float;
} else if (iType == DataType::Int8 && oType == DataType::Float32) {
return CastType::Int82Float;
} else if (iType == DataType::Int8 && oType == DataType::Int16) {
return CastType::Int82Int16;
} else if (iType == DataType::Int8 && oType == DataType::Int32) {
return CastType::Int82Int32;
} else if (iType == DataType::UInt8 && oType == DataType::Int32) {
return CastType::Uint82Int32;
} else if (iType == DataType::UInt8 && oType == DataType::Float32) {
return CastType::Uint82Float;
} else if (iType == DataType::UInt8 && oType == DataType::Int64) {
return CastType::Uint82Int64;
} else if (iType == DataType::Int64 && oType == DataType::Float32) {
return CastType::Int642Float;
} else if (iType == DataType::Int64 && oType == DataType::UInt32) {
return CastType::Int642Uint32;
} else if (iType == DataType::Int64 && oType == DataType::Int32) {
return CastType::Int642Int32;
} else if (iType == DataType::UInt32 && oType == DataType::Int64) {
return CastType::Uint322Int64;
} else if (iType == DataType::Float16 && oType == DataType::Float32) {
return CastType::Float162Float;
} else if (iType == DataType::BFloat16 && oType == DataType::Float32) {
return CastType::BFloat162Float;
} else {
IT_TODO_HALT_MSG("Unsupported CastType : input_type is " +
iType.toString() + " output_type is " +
oType.toString());
}
}
static DataType dtype_repr_convert(int dtype) {
switch (dtype) {
case 0:
@ -323,6 +394,8 @@ static DataType dtype_repr_convert(int dtype) {
return DataType::UInt32;
case 13:
return DataType::UInt64;
case 16:
return DataType::BFloat16;
default:
IT_ASSERT(false, "Unsupported data type");
}

View File

@ -85,6 +85,7 @@ void TensorObj::printData() const {
else TRY_PRINT(11) //
else TRY_PRINT(12) //
else TRY_PRINT(13) //
else TRY_PRINT(16) //
else IT_TODO_HALT();
#undef TRY_PRINT
@ -118,6 +119,7 @@ bool TensorObj::equalData(const Tensor &rhs, double relativeError) const {
else TEST_EQUAL(11) //
else TEST_EQUAL(12) //
else TEST_EQUAL(13) //
else TEST_EQUAL(16) //
else IT_TODO_HALT();
#undef TEST_EQUAL

View File

@ -95,6 +95,7 @@ void export_values(py::module &m) {
.VALUE(OpType, Abs)
.VALUE(OpType, Resize)
.VALUE(OpType, Dropout)
.VALUE(OpType, Cast)
.export_values();
#undef VALUE
@ -129,6 +130,8 @@ static int tensor_dtype(Tensor t) {
return 12;
if (t->getDType() == DataType::UInt64)
return 13;
if (t->getDType() == DataType::BFloat16)
return 16;
IT_ASSERT(false, "Unsupported data type");
}
@ -242,6 +245,13 @@ static int flatten_axis_of(Operator op) {
return dynamic_cast<const FlattenObj *>(op.get())->getAxis();
}
static int cast_to_of(Operator op) {
IT_ASSERT(op->getOpType() == OpType::Cast);
auto castOutputDtype =
dynamic_cast<const CastObj *>(op.get())->getOutputDataType();
return castOutputDtype.getIndex();
}
void export_functions(py::module &m) {
#define FUNCTION(NAME) def(#NAME, &NAME)
m.def("cpu_runtime", &NativeCpuRuntimeObj::getInstance)
@ -271,7 +281,8 @@ void export_functions(py::module &m) {
.FUNCTION(concat_axis_of)
.FUNCTION(split_axis_of)
.FUNCTION(gather_axis_of)
.FUNCTION(flatten_axis_of);
.FUNCTION(flatten_axis_of)
.FUNCTION(cast_to_of);
#undef FUNCTION
}
@ -346,6 +357,7 @@ void init_graph_builder(py::module &m) {
.def("reduce_mean", &Handler::reduceMean, policy::move)
.def("slice", &Handler::slice, policy::move)
.def("pad", &Handler::pad, policy::move)
.def("cast", &Handler::cast, policy::move)
.def("topo_sort", &Handler::topo_sort, policy::automatic)
.def("optimize", &Handler::optimize, policy::automatic)
.def("operators", &Handler::operators, policy::move)

View File

@ -13,7 +13,6 @@ template <typename T> class NaiveMatmul : public CpuKernelWithoutConfig {
T *C = op->getOutput()->getRawDataPtr<T *>();
IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
IT_ASSERT(op->getAct() == ActType::None);
IT_ASSERT(op->getB() == 1);
const int M = op->getM(), N = op->getN(), K = op->getK();
for (int i = 0; i < M; i++) {
for (int j = 0; j < N; j++) {

View File

@ -14,9 +14,9 @@ class GatherCuda : public CudaKernelWithoutConfig {
auto out = op->getOutput();
metaData.indexValue = index->getRawDataPtr<int *>();
metaData.axis = op->getAxis();
metaData.inNDim = in->getDims().size();
metaData.outNDim = out->getDims().size();
metaData.idxNDim = index->getDims().size();
metaData.inNDim = in->getRank();
metaData.outNDim = out->getRank();
metaData.idxNDim = index->getRank();
for (int i = 0; i < metaData.outNDim; ++i)
metaData.outDim[i] = out->getDims()[i];
for (int i = 0; i < metaData.idxNDim; ++i) {

View File

@ -51,8 +51,8 @@ class matmulCublas : public Kernel {
cublasStatus_t stat;
if (b > 1) {
// Support batch broadcast with zero stride
int dimA = op->getInputs(0)->getDims().size();
int dimB = op->getInputs(1)->getDims().size();
int dimA = op->getInputs(0)->getRank();
int dimB = op->getInputs(1)->getRank();
long long strideA =
(dimA == 2 ||
(dimA == 3 && op->getInputs(0)->getDims()[0] == 1))

View File

@ -7,7 +7,7 @@ class PadSliceCudaCompute {
public:
void do_compute(Tensor partTensor, Tensor wholeTensor, const Shape &begNos,
bool isPad) const {
int nDims = partTensor->getDims().size();
int nDims = partTensor->getRank();
IT_ASSERT(MAX_DIM >= nDims);
TransMetaData metadata;
for (int i = 0; i < nDims; i++) {

View File

@ -14,7 +14,7 @@ class ReduceMeanCudnn : public CudaKernelWithoutConfig {
// Each dimension of the output tensor C must match the corresponding
// dimension of the input tensor A or must be equal to 1. The dimensions
// equal to 1 indicate the dimensions of A to be reduced.
int nInDims = input->getDims().size();
int nInDims = input->getRank();
IT_ASSERT(CUDNN_DIM_MAX >= nInDims);
int inDimArray[CUDNN_DIM_MAX], outDimArray[CUDNN_DIM_MAX],
inStrideArray[CUDNN_DIM_MAX], outStrideArray[CUDNN_DIM_MAX];

View File

@ -9,7 +9,7 @@ class ResizeCuda : public CudaKernelWithoutConfig {
auto in = op->getInputs(0);
auto out = op->getOutputs()[0];
int nDims = in->getDims().size();
int nDims = in->getRank();
if (nDims > 4)
IT_TODO_HALT();

View File

@ -9,7 +9,7 @@ namespace infini {
class CudaCompute {
void initComposedTensorMetadata(ComposedTensorMetadata &metadata,
Tensor tensor) const {
int nDims = tensor->getDims().size();
int nDims = tensor->getRank();
auto strides = tensor->getStride();
IT_ASSERT(strides.size() == (size_t)nDims);
for (int i = 0; i < nDims; ++i) {
@ -60,8 +60,8 @@ class ConcatCuda : private CudaCompute, public CudaKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *_context) const override {
do_compute(_op->getOutput(), _op->getInputs(),
as<ConcatObj>(_op)->getDim(),
_op->getOutput()->getDims().size(), false);
as<ConcatObj>(_op)->getDim(), _op->getOutput()->getRank(),
false);
}
};
@ -69,8 +69,8 @@ class SplitCuda : private CudaCompute, public CudaKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *_context) const override {
do_compute(_op->getInputs(0), _op->getOutputs(),
as<SplitObj>(_op)->getDim(),
_op->getInputs(0)->getDims().size(), true);
as<SplitObj>(_op)->getDim(), _op->getInputs(0)->getRank(),
true);
}
};

View File

@ -14,7 +14,7 @@ class MklBatchNorm : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
auto srcMd = dnnl::memory::desc(dims, dnnl::memory::data_type::f32,
@ -25,7 +25,7 @@ class MklBatchNorm : public MklKernelWithoutConfig {
getUserFormatTag(dims.size()));
auto output = dnnl::memory(dstMd, context->getEngine(), dstData);
std::vector<dnnl_dim_t> meanDims(op->getInputs(0)->getDims().size(), 1);
std::vector<dnnl_dim_t> meanDims(op->getInputs(0)->getRank(), 1);
meanDims[1] = op->getInputs(0)->getDims()[1];
auto meanMd = dnnl::memory::desc(meanDims, dnnl::memory::data_type::f32,
getUserFormatTag(meanDims.size()));

View File

@ -34,7 +34,7 @@ class MklBinary : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
auto srcMd1 = dnnl::memory::desc(dims, dnnl::memory::data_type::f32,
@ -89,7 +89,7 @@ class MklUnary : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
auto srcMd = dnnl::memory::desc(dims, dnnl::memory::data_type::f32,

View File

@ -17,9 +17,9 @@ class MklGather : public MklKernelWithoutConfig {
int oSize = out->size();
int idxSize = index->size();
int inNDim = in->getDims().size();
int oNDim = out->getDims().size();
int idxNDim = index->getDims().size();
int inNDim = in->getRank();
int oNDim = out->getRank();
int idxNDim = index->getRank();
int axis = op->getAxis();
int outDim[4] = {0};

View File

@ -10,7 +10,7 @@ class MklPad : public MklKernelWithoutConfig {
auto context = dynamic_cast<const MklRuntimeObj *>(_context);
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i) {
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i) {
dims.push_back(op->getInputs(0)->getDims()[i]);
}
auto paddedMd = dnnl::memory::desc(dims, dnnl::memory::data_type::f32,

View File

@ -17,7 +17,7 @@ class MklPooling : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
auto [n, c, h, w, r, s] = op->getNCHWRS();
auto [ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
auto nDim = op->getOutput()->getDims().size();
auto nDim = op->getOutput()->getRank();
auto oh = op->getOutput()->getDims()[nDim - 2];
auto ow = op->getOutput()->getDims()[nDim - 1];

View File

@ -18,16 +18,16 @@ class MklReduce : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> inDims, inStrides;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i) {
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i) {
inDims.push_back(op->getInputs(0)->getDims()[i]);
inStrides.push_back(op->getInputs(0)->getStride()[i]);
}
std::vector<dnnl_dim_t> oDims(op->getInputs(0)->getDims().size(), 0),
oStrides(op->getInputs(0)->getDims().size(), 1);
std::vector<dnnl_dim_t> oDims(op->getInputs(0)->getRank(), 0),
oStrides(op->getInputs(0)->getRank(), 1);
if (!op->getKeepDims()) {
oDims = inDims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i) {
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i) {
if (op->isReduced(i)) {
oDims[i] = 1;
}
@ -38,7 +38,7 @@ class MklReduce : public MklKernelWithoutConfig {
stride *= oDims[i];
}
} else {
for (size_t i = 0; i < op->getOutput(0)->getDims().size(); ++i) {
for (size_t i = 0; i < op->getOutput(0)->getRank(); ++i) {
oDims[i] = op->getOutput(0)->getDims()[i];
oStrides[i] = op->getOutput(0)->getStride()[i];
}

View File

@ -10,7 +10,7 @@ class MklReshape : public MklKernelWithoutConfig {
auto context = dynamic_cast<const MklRuntimeObj *>(_context);
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
// create src md and src memory

View File

@ -30,7 +30,7 @@ class MklResize : public MklKernelWithoutConfig {
enum_to_underlying(ResizeObj::ECoordinateTransMode::halfPixel))
IT_TODO_HALT();
int nDim = op->getInputs(0)->getDims().size();
int nDim = op->getInputs(0)->getRank();
IT_ASSERT(nDim == 3 || nDim == 4 ||
nDim == 5 &&
(op->getInputs(0)->getDims()[0] == 1 &&
@ -44,7 +44,7 @@ class MklResize : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> idims, odims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i) {
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i) {
idims.push_back(op->getInputs(0)->getDims()[i]);
odims.push_back(op->getOutput(0)->getDims()[i]);
}

View File

@ -10,7 +10,7 @@ class MklSlice : public MklKernelWithoutConfig {
auto context = dynamic_cast<const MklRuntimeObj *>(_context);
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
// create src md

View File

@ -14,7 +14,7 @@ class MklSoftmax : public MklKernelWithoutConfig {
// create user memory that describes data layout in the buffers
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
auto srcMd = dnnl::memory::desc(dims, dnnl::memory::data_type::f32,

View File

@ -10,7 +10,7 @@ class MklSplit : public MklKernelWithoutConfig {
auto context = dynamic_cast<const MklRuntimeObj *>(_context);
std::vector<dnnl_dim_t> dims;
for (size_t i = 0; i < op->getInputs(0)->getDims().size(); ++i)
for (size_t i = 0; i < op->getInputs(0)->getRank(); ++i)
dims.push_back(op->getInputs(0)->getDims()[i]);
// create src md

View File

@ -23,16 +23,11 @@ string G2BMMObj::toString() const {
optional<vector<Shape>> G2BMMObj::inferShape(const TensorVec &inputs) const {
auto A = inputs[0], B = inputs[1];
if (!(A->getDims().size() == 3 && B->getDims().size() == 3))
return {};
if (!(A->getDims()[0] == B->getDims()[0]))
return {};
if (!(A->getDims()[1] == B->getDims()[1]))
return {};
if (!(A->getDims()[2] == B->getDims()[2]))
return {};
if (width < 0)
return {};
IT_ASSERT(A->getRank() == 3 && B->getRank() == 3);
IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
IT_ASSERT(A->getDims()[1] == B->getDims()[1]);
IT_ASSERT(A->getDims()[2] == B->getDims()[2]);
IT_ASSERT(width >= 0);
int b(A->getDims()[0]), m(A->getDims()[1]), n(2 * width + 1);
return {{{b, m, n}}};
}

View File

@ -24,14 +24,10 @@ string GBMMObj::toString() const {
optional<vector<Shape>> GBMMObj::inferShape(const TensorVec &inputs) const {
auto A = inputs[0], B = inputs[1];
if (!(A->getDims().size() == 3 && B->getDims().size() == 3))
return {};
if (!(A->getDims()[0] == B->getDims()[0]))
return {};
if (!(A->getDims()[1] == B->getDims()[1]))
return {};
if (A->getDims()[2] % 2 == 0)
return {};
IT_ASSERT(A->getRank() == 3 && B->getRank() == 3);
IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
IT_ASSERT(A->getDims()[1] == B->getDims()[1]);
IT_ASSERT(A->getDims()[2] % 2 != 0);
int b(A->getDims()[0]), m(A->getDims()[1]), k(B->getDims()[2]);
return {{{b, m, k}}};
}

View File

@ -21,9 +21,10 @@ BatchNormObj::inferShape(const TensorVec &inputs) const {
auto scale = inputs[3];
auto bias = inputs[4];
auto c = std::vector<int>{input->getDims()[1]};
if (mean->getDims() != c || var->getDims() != c || scale->getDims() != c ||
bias->getDims() != c)
return {};
IT_ASSERT(mean->getRank() == 1 && mean->getDims() == c);
IT_ASSERT(var->getRank() == 1 && var->getDims() == c);
IT_ASSERT(scale->getRank() == 1 && scale->getDims() == c);
IT_ASSERT(bias->getRank() == 1 && bias->getDims() == c);
return {{input->getDims()}};
}

View File

@ -1,28 +1,29 @@
#include "operators/concat.h"
#include "utils/operator_utils.h"
namespace infini {
ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int dim)
: OperatorObj(OpType::Concat, inputs, {output}), dim(dim) {
int rank = inputs[0]->getRank();
dim = get_real_axis(dim, rank);
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) const {
IT_ASSERT(inputs.size() > 1);
Shape dims = inputs[0]->getDims();
auto rank = inputs[0]->getRank();
ShapeElem n = dims.at(dim);
for (auto itr = inputs.begin() + 1; itr != inputs.end(); ++itr) {
auto input = *itr;
auto iDims = input->getDims();
if (dims.size() != iDims.size())
return {};
int nDims = dims.size();
for (auto i = 0; i < nDims; i++) {
IT_ASSERT(rank == input->getRank());
for (auto i = 0; i < (int)rank; i++) {
if (i == dim) {
n += iDims.at(i);
continue;
}
if (iDims.at(i) != dims.at(i))
return {};
IT_ASSERT(iDims.at(i) == dims.at(i));
}
}
dims[dim] = n;

View File

@ -93,8 +93,7 @@ optional<vector<Shape>> ConvObj::inferShape(const TensorVec &inputs) const {
int on = n, oc = f;
int oh = 0, ow = 0;
// For NCHW+FCRS layout, C of input is divisable by C of weight
if (input->getDims()[1] % weight->getDims()[1] != 0)
return {};
IT_ASSERT(input->getDims()[1] % weight->getDims()[1] == 0);
// Set padding size
if (padding == PaddingMode::Other) {
oh = (h - (r - sh) * dh + ph * 2) / sh;
@ -151,8 +150,7 @@ ConvTransposed2dObj::inferShape(const TensorVec &inputs) const {
auto c = weight->getDims()[1];
auto r = weight->getDims()[2];
auto s = weight->getDims()[3];
if (f != weight->getDims()[0])
return {};
IT_ASSERT(f == weight->getDims()[0]);
int on = n, oc = c * group;
int oh = 0, ow = 0;
@ -232,8 +230,7 @@ ConvBackwardFilterObj::inferShape(const TensorVec &inputs) const {
int on = n, oc = f;
int oh = 0, ow = 0;
// For NCHW+FCRS layout, C of input is divisable by C of weight
if (inputX->getDims()[1] % diffY->getDims()[1] != 0)
return {};
IT_ASSERT(inputX->getDims()[1] % diffY->getDims()[1] == 0);
// Set padding size
if (padding == PaddingMode::Other) {
oh = (h - (r - sh) * dh + ph * 2) / sh;

View File

@ -9,8 +9,8 @@ DetObj::DetObj(GraphObj *graph, Tensor input, Tensor output, Mode mode)
optional<vector<Shape>> DetObj::inferShape(const TensorVec &inputs) const {
const auto A = inputs[0];
auto input = A->getDims();
int length = input.size();
if (length == 2) {
int rank = A->getRank();
if (rank == 2) {
std::vector<int> output = {1};
return {{output}};
} else {

View File

@ -1,4 +1,5 @@
#include "operators/element_wise.h"
#include "utils/operator_utils.h"
namespace infini {
ElementWiseObj::ElementWiseObj(OpType type, GraphObj *graph, Tensor input0,
@ -9,31 +10,8 @@ ElementWiseObj::ElementWiseObj(OpType type, GraphObj *graph, Tensor input0,
optional<vector<Shape>>
ElementWiseObj::inferShape(const TensorVec &inputs) const {
// For now,we only process the same dims here, broardcast will be considered
// in the opt layer.
const auto A = inputs[0], B = inputs[1];
int max_len = std::max(A->getDims().size(), B->getDims().size());
std::vector<int> A_(max_len, 1);
std::vector<int> B_(max_len, 1);
std::vector<int> res(max_len, 1);
memcpy(A_.data() + max_len - A->getDims().size(), A->getDims().data(),
A->getDims().size() * sizeof(int));
memcpy(B_.data() + max_len - B->getDims().size(), B->getDims().data(),
B->getDims().size() * sizeof(int));
// std::copy(A->getDims().begin(), A->getDims().end(), A_.begin() + (max_len
// - A->getDims().size())); std::copy(B->getDims().begin(),
// B->getDims().end(), B_.begin() + (max_len - B->getDims().size()));
// std::copy(A->getDims().rbegin(), A->getDims().rend(), A_.rbegin());
// std::copy(B->getDims().rbegin(), B->getDims().rend(), B_.rbegin());
for (int i = 0; i < max_len; ++i) {
if (A_[i] == B_[i] || (A_[i] == 1 || B_[i] == 1)) {
res[i] = std::max(A_[i], B_[i]);
} else {
return {};
}
}
auto res = infer_broadcast(A->getDims(), B->getDims());
return {{res}};
}
@ -69,9 +47,8 @@ MSELossObj::MSELossObj(GraphObj *graph, Tensor input0, Tensor input1,
optional<vector<Shape>> MSELossObj::inferShape(const TensorVec &inputs) const {
const auto A = inputs[0], B = inputs[1];
if (A->getDims().size() != B->getDims().size() ||
A->getDims() != B->getDims())
return {};
IT_ASSERT(A->getRank() == B->getRank());
IT_ASSERT(A->getDims() == B->getDims());
if (reductionMode == None) {
return {{A->getDims()}};

View File

@ -1,16 +1,18 @@
#include "operators/extend.h"
#include "utils/operator_utils.h"
namespace infini {
ExtendObj::ExtendObj(GraphObj *graph, Tensor input, Tensor output, int dim,
int num)
: OperatorObj(OpType::Extend, {input}, {output}), dim(dim), num(num) {
int rank = input->getRank();
dim = get_real_axis(dim, rank);
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>> ExtendObj::inferShape(const TensorVec &inputs) const {
auto ret = inputs[0]->getDims();
IT_ASSERT((size_t)dim < ret.size());
ret[dim] = ret[dim] * (num + 1);
return {{ret}};
}

View File

@ -1,9 +1,12 @@
#include "operators/gather.h"
#include "utils/operator_utils.h"
namespace infini {
GatherObj::GatherObj(GraphObj *graph, Tensor input, Tensor indices,
Tensor output, int axis)
: OperatorObj(OpType::Gather, {input, indices}, {output}), axis(axis) {
int rank = input->getRank();
axis = get_real_axis(axis, rank);
IT_ASSERT(checkValid(graph));
}
@ -11,12 +14,6 @@ optional<vector<Shape>> GatherObj::inferShape(const TensorVec &inputs) const {
auto dims0 = inputs[0]->getDims();
auto dims1 = inputs[1]->getDims();
if (axis < 0)
IT_TODO_HALT();
if ((size_t)axis >= dims0.size())
return {};
IT_ASSERT(CheckIndexValid());
Shape dim = dims0;

View File

@ -1,4 +1,6 @@
#include "operators/matmul.h"
#include "utils/operator_utils.h"
#include <numeric>
namespace infini {
@ -9,25 +11,23 @@ MatmulObj::MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, bool transA,
transA(transA), transB(transB), act(act), b(1) {
auto shape_a = A->getDims();
auto shape_b = B->getDims();
int dimA = shape_a.size(), dimB = shape_b.size();
IT_ASSERT(dimA >= 2 && dimB >= 2);
int rankA = A->getRank();
int rankB = B->getRank();
IT_ASSERT(rankA >= 2 && rankB >= 2);
Shape shape_a1(shape_a.begin(), shape_a.begin() + (rankA - 2));
Shape shape_b1(shape_b.begin(), shape_b.begin() + (rankB - 2));
auto ret = infer_broadcast(shape_a1, shape_b1);
if (ret.empty()) {
b = 1;
if (dimA <= 3 && dimB <= 3) {
int b1 = dimA == 2 ? 1 : A->getDims()[0];
int b2 = dimB == 2 ? 1 : B->getDims()[0];
b = std::max(b1, b2);
} else {
IT_ASSERT_TODO(dimA == dimB);
for (size_t i = 0; i < shape_a.size() - 2; ++i) {
IT_ASSERT_TODO(shape_a[i] == shape_b[i]);
b *= shape_a[i];
}
b = std::accumulate(ret.begin(), ret.end(), 1);
}
auto kA = *(transA ? shape_a.rbegin() + 1 : shape_a.rbegin());
auto kB = *(transB ? shape_b.rbegin() : shape_b.rbegin() + 1);
IT_ASSERT(kA == kB);
m = *(transA ? shape_a.rbegin() : shape_a.rbegin() + 1);
n = *(transB ? shape_b.rbegin() + 1 : shape_b.rbegin());
k = *(transA ? shape_a.rbegin() + 1 : shape_a.rbegin());
k = kA;
IT_ASSERT(checkValid(graph));
}
@ -42,43 +42,16 @@ string MatmulObj::toString() const {
optional<vector<Shape>> MatmulObj::inferShape(const TensorVec &inputs) const {
auto A = inputs[0], B = inputs[1];
int dimA = A->getDims().size(), dimB = B->getDims().size();
if (dimA > 3 || dimB > 3) {
// no broadcast
auto shape_a = inputs[0]->getDims();
auto it = shape_a.rbegin();
*it++ = n;
*it++ = m;
return {{std::move(shape_a)}};
}
int b1 = dimA == 2 ? 1 : A->getDims()[0];
int b2 = dimB == 2 ? 1 : B->getDims()[0];
int b = std::max(b1, b2);
int m = transA ? A->getDims()[dimA - 1] : A->getDims()[dimA - 2];
int n = transB ? B->getDims()[dimB - 2] : B->getDims()[dimB - 1];
int kA = transA ? A->getDims()[dimA - 2] : A->getDims()[dimA - 1];
int kB = transB ? B->getDims()[dimB - 1] : B->getDims()[dimB - 2];
if ((dimA != 2 && dimA != 3) || (dimB != 2 && dimB != 3)) {
printf("Bad input dim: dimA = %d, dimB = %d\n", dimA, dimB);
return {};
}
if (b1 != 1 && b2 != 1 && b1 != b2) {
printf("Bad batch size b1 = %d, b2 = %d\n", b1, b2);
return {};
}
if (kA != kB) {
printf("Bad K: kA = %d, kB = %d\n", kA, kB);
return {};
}
if (dimA == 2 && dimB == 2) {
return {{{m, n}}};
} else {
return {{{b, m, n}}};
}
auto shapeA = A->getDims();
auto shapeB = B->getDims();
int rankA = A->getRank();
int rankB = B->getRank();
Shape shapeA1(shapeA.begin(), shapeA.begin() + (rankA - 2));
Shape shapeB1(shapeB.begin(), shapeB.begin() + (rankB - 2));
Shape ret = infer_broadcast(shapeA1, shapeB1);
ret.emplace_back(m);
ret.emplace_back(n);
return {{ret}};
}
vector<int> MatmulObj::getWorkloadVector() const {

View File

@ -9,7 +9,7 @@ PadObj::PadObj(GraphObj *graph, Tensor input, Tensor output,
else {
auto nAxis = (*axes).size();
IT_ASSERT(_pads.size() == nAxis * 2);
auto nDims = input->getDims().size();
auto nDims = input->getRank();
pads = vector<int>(nDims * 2, 0);
for (size_t i = 0; i < nAxis; ++i) {
@ -24,13 +24,11 @@ PadObj::PadObj(GraphObj *graph, Tensor input, Tensor output,
optional<vector<Shape>> PadObj::inferShape(const TensorVec &inputs) const {
auto dims = inputs[0]->getDims();
int nDims = dims.size();
if (nDims * 2 != (int)pads.size())
return {};
for (int i = 0; i < nDims; ++i) {
if (pads[i] < 0 || pads[i + nDims] < 0)
return {};
dims[i] += pads[i] + pads[i + nDims];
int rank = inputs[0]->getRank();
IT_ASSERT(rank * 2 == (int)pads.size());
for (int i = 0; i < rank; ++i) {
IT_ASSERT(pads[i] >= 0 && pads[i + rank] >= 0);
dims[i] += pads[i] + pads[i + rank];
}
return {{dims}};

View File

@ -16,13 +16,13 @@ PoolingObj::PoolingObj(GraphObj *graph, OpType optype, Tensor input,
optional<vector<Shape>> PoolingObj::inferShape(const TensorVec &inputs) const {
const auto &input = inputs[0];
auto h = input->getDims()[input->getDims().size() - 2],
w = input->getDims()[input->getDims().size() - 1];
auto h = input->getDims()[input->getRank() - 2],
w = input->getDims()[input->getRank() - 1];
int oh = (h - (kh - sh) + ph * 2) / sh;
int ow = (w - (kw - sw) + pw * 2) / sw;
auto ret = input->getDims();
ret[input->getDims().size() - 2] = oh;
ret[input->getDims().size() - 1] = ow;
ret[input->getRank() - 2] = oh;
ret[input->getRank() - 1] = ow;
return {{ret}};
}

View File

@ -1,15 +1,14 @@
#include "operators/reduce_mean.h"
#include "utils/operator_utils.h"
namespace infini {
ReduceMeanObj::ReduceMeanObj(GraphObj *graph, Tensor input, Tensor output,
const optional<vector<int>> &_axes, bool keepDims)
: OperatorObj(OpType::ReduceMean, {input}, {output}), keepDims(keepDims) {
const auto size = input->getDims().size();
const auto size = input->getRank();
if (_axes) {
for (auto idx : *_axes) {
if (idx < 0)
IT_TODO_HALT();
IT_ASSERT((size_t)idx < size);
idx = get_real_axis(idx, size);
axes.emplace(idx);
}
} else
@ -25,6 +24,7 @@ bool ReduceMeanObj::isReduced(int idx) const {
optional<vector<Shape>>
ReduceMeanObj::inferShape(const TensorVec &inputs) const {
auto dims = inputs[0]->getDims();
auto rank = inputs[0]->getRank();
if (keepDims) {
Shape ret = dims;
@ -33,7 +33,7 @@ ReduceMeanObj::inferShape(const TensorVec &inputs) const {
return {{ret}};
} else {
Shape ret;
for (size_t i = 0; i < dims.size(); ++i) {
for (size_t i = 0; i < rank; ++i) {
if (!isReduced(i))
ret.emplace_back(dims[i]);
}

View File

@ -1,4 +1,5 @@
#include "operators/reshape.h"
#include "utils/operator_utils.h"
namespace infini {
ReshapeObj::ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims)
@ -8,10 +9,10 @@ ReshapeObj::ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims)
optional<vector<Shape>> ReshapeObj::inferShape(const TensorVec &inputs) const {
size_t size = 1;
for (size_t i = 0; i < dims.size(); ++i)
for (size_t i = 0; i < dims.size(); ++i) {
size *= dims.at(i);
if (size != inputs[0]->size())
return {};
}
IT_ASSERT(size == inputs[0]->size());
return {{dims}};
}
@ -41,22 +42,18 @@ vector<int> ReshapeObj::getOpAttrVector() const {
FlattenObj::FlattenObj(GraphObj *graph, Tensor input, Tensor output, int _axis)
: OperatorObj(OpType::Flatten, {input}, {output}) {
if (_axis >= 0 && (size_t)_axis < input->getDims().size())
axis = _axis;
else if (_axis <= -1 && (size_t)_axis >= -input->getDims().size())
axis = _axis + input->getDims().size();
else
IT_ASSERT(0);
int rank = input->getRank();
axis = get_real_axis(_axis, rank);
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>> FlattenObj::inferShape(const TensorVec &inputs) const {
int sizeB = 1, sizeE = 1;
auto dims = getInputs(0)->getDims();
int ndim = dims.size();
for (int i = 0; i < ndim; ++i)
int rank = getInputs(0)->getRank();
for (int i = 0; i < rank; ++i) {
((i < axis) ? sizeB : sizeE) *= dims.at(i);
}
return {{{sizeB, sizeE}}};
}

View File

@ -45,11 +45,11 @@ void ResizeObj::init(const Tensor &input, const Tensor &sizes,
if (ECoordinateTransMode::tfCropAndResize == coMode) {
IT_ASSERT(nullptr != roi);
inputs.push_back(roi);
IT_ASSERT(roi->getDims().size() == 1);
IT_ASSERT(roi->getRank() == 1);
IT_ASSERT((size_t)roi->getDims()[0] == this->axes.size() * 2);
// init roi_start = 0;roi_end =1
size_t nDims = input->getDims().size();
size_t nDims = input->getRank();
for (size_t i = 0; i < nDims; ++i) {
this->roi.emplace_back(0);
}
@ -75,24 +75,26 @@ void ResizeObj::InitBySizes(Tensor input, Tensor sizes,
const std::optional<vector<int>> &axes) {
IT_ASSERT(sizes != nullptr);
size_t size = sizes->getDims()[0];
IT_ASSERT(size == input->getDims().size() ||
IT_ASSERT(size == input->getRank() ||
(axes != std::nullopt && size == (*axes).size()));
if (axes == std::nullopt)
for (size_t i = 0; i < input->getDims().size(); ++i)
if (axes == std::nullopt) {
for (size_t i = 0; i < input->getRank(); ++i) {
this->axes.emplace_back(i);
else
}
} else {
// check axes
for (size_t i = 0; i < (*axes).size(); ++i) {
auto val = (*axes)[i];
if (val < 0)
if (val < 0) {
IT_TODO_HALT();
IT_ASSERT((size_t)val < inputs[0]->getDims().size());
}
IT_ASSERT((size_t)val < inputs[0]->getRank());
this->axes.emplace_back(val);
}
}
// init this->scales
for (size_t i = 0; i < input->getDims().size(); ++i) {
for (size_t i = 0; i < input->getRank(); ++i) {
this->scales.emplace_back(1);
}
@ -109,9 +111,10 @@ void ResizeObj::InitBySizes(Tensor input, Tensor sizes,
int n = this->axes.size();
switch (ratioPolicy) {
case EKeepAspectRatioPolicy::stretch:
for (int i = 0; i < n; ++i)
for (int i = 0; i < n; ++i) {
scales[this->axes[i]] =
(float)data[i] / (float)inDims[this->axes[i]];
}
break;
case EKeepAspectRatioPolicy::notLarger: {
float scale = (float)data[0] / (float)inDims[this->axes[0]];
@ -119,8 +122,9 @@ void ResizeObj::InitBySizes(Tensor input, Tensor sizes,
auto tmp = (float)data[i] / (float)inDims[this->axes[i]];
scale = scale < tmp ? scale : tmp;
}
for (int i = 0; i < n; ++i)
for (int i = 0; i < n; ++i) {
scales[this->axes[i]] = scale;
}
break;
}
case EKeepAspectRatioPolicy::notSmaller: {
@ -129,8 +133,9 @@ void ResizeObj::InitBySizes(Tensor input, Tensor sizes,
auto tmp = (float)data[i] / (float)inDims[this->axes[i]];
scale = scale > tmp ? scale : tmp;
}
for (int i = 0; i < n; ++i)
for (int i = 0; i < n; ++i) {
scales[this->axes[i]] = scale;
}
break;
}
default:
@ -142,7 +147,7 @@ void ResizeObj::InitByScales(Tensor input, Tensor scales,
const std::optional<vector<int>> &axes) {
IT_ASSERT(scales != nullptr);
size_t size = scales->getDims()[0];
IT_ASSERT(size == input->getDims().size() ||
IT_ASSERT(size == input->getRank() ||
(axes != std::nullopt && size == (*axes).size()));
// copy scales data to host.
@ -155,28 +160,30 @@ void ResizeObj::InitByScales(Tensor input, Tensor scales,
(void *)data, scales->getRawDataPtr<void *>(), scales->getBytes());
// init this->scales
for (size_t i = 0; i < input->getDims().size(); ++i) {
for (size_t i = 0; i < input->getRank(); ++i) {
this->scales.emplace_back(1);
}
if (axes == std::nullopt)
for (size_t i = 0; i < input->getDims().size(); ++i) {
if (axes == std::nullopt) {
for (size_t i = 0; i < input->getRank(); ++i) {
this->axes.emplace_back(i);
IT_ASSERT(data[i] > 0);
this->scales[i] = data[i];
}
else
} else {
// check axes
for (size_t i = 0; i < (*axes).size(); ++i) {
auto val = (*axes)[i];
if (val < 0)
if (val < 0) {
IT_TODO_HALT();
IT_ASSERT((size_t)val < inputs[0]->getDims().size());
}
IT_ASSERT((size_t)val < inputs[0]->getRank());
this->axes.emplace_back(val);
IT_ASSERT(data[i] > 0);
this->scales[val] = data[i];
}
}
}
vector<DataType> ResizeObj::inferDataType(const TensorVec &inputs) const {
IT_ASSERT(inputs.size() == 2 || inputs.size() == 3);
@ -202,8 +209,8 @@ float ResizeObj::round_int(float x) const {
optional<vector<Shape>> ResizeObj::inferShape(const TensorVec &inputs) const {
auto inDims = inputs[0]->getDims();
Shape ret = inDims;
int nDim = inDims.size();
for (int i = 0; i < nDim; ++i) {
int rank = inputs[0]->getRank();
for (int i = 0; i < rank; ++i) {
int size = round_int(scales[i] * inDims[i]);
ret[i] = size;
}
@ -217,12 +224,14 @@ std::string ResizeObj::toString() const {
<< "[" << getGuid() << "]";
os << "(";
os << vecToString(inputs[0]->getDims()) << ",";
if (inputs.size() == 3)
if (inputs.size() == 3) {
os << "roi=" << vecToString(inputs[2]->getDims()) << ",";
if (isResizeBySizes())
}
if (isResizeBySizes()) {
os << "sizes=" << vecToString(inputs[1]->getDims()) << ",";
else
} else {
os << "scales=" << vecToString(inputs[1]->getDims()) << ",";
}
os << "axes=" << vecToString(axes) << ",";
os << "coMode=" << enum_to_underlying(coMode) << ",";
os << "nearestMode=" << enum_to_underlying(nearestMode) << ",";
@ -230,16 +239,18 @@ std::string ResizeObj::toString() const {
os << "input=" << inputs[0]->getGuid() << ",";
os << inputs[1]->getGuid() << ",";
if (inputs.size() == 3)
if (inputs.size() == 3) {
os << inputs[2]->getGuid() << ",";
}
os << "output=" << outputs[0]->getGuid() << ")";
return os.str();
}
vector<int> ResizeObj::getWorkloadVector() const {
vector<int> ret = inputs[0]->getDims();
for (size_t i = 0; i < outputs[0]->getDims().size(); ++i)
for (size_t i = 0; i < outputs[0]->getRank(); ++i) {
ret.emplace_back(outputs[0]->getDims()[i]);
}
// ratioPolicy only effects output shape, so did not need
// here.
ret.emplace_back(enum_to_underlying(coMode));

View File

@ -1,15 +1,12 @@
#include "operators/softmax.h"
#include "utils/operator_utils.h"
namespace infini {
SoftmaxObj::SoftmaxObj(GraphObj *graph, Tensor input, Tensor output, int _axis)
: OperatorObj(OpType::Softmax, {input}, {output}) {
if (_axis >= 0 && (size_t)_axis < input->getDims().size())
axis = _axis;
else if (_axis <= -1 && (size_t)_axis >= -input->getDims().size())
axis = _axis + input->getDims().size();
else
IT_ASSERT(0);
int rank = input->getRank();
axis = get_real_axis(_axis, rank);
IT_ASSERT(checkValid(graph));
}

View File

@ -1,4 +1,5 @@
#include "operators/split.h"
#include "utils/operator_utils.h"
#include <numeric>
namespace infini {
@ -7,6 +8,8 @@ SplitObj::SplitObj(GraphObj *graph, Tensor input,
: OperatorObj(OpType::Split, {input},
((!outputs) ? TensorVec(num, nullptr) : std::move(*outputs))),
dim(dim), num(num), ratio({}) {
int rank = input->getRank();
dim = get_real_axis(dim, rank);
int dimSize = input->getDims().at(dim);
int pieceSize = dimSize / num;
int lastSize = dimSize - pieceSize * num;
@ -26,6 +29,8 @@ SplitObj::SplitObj(GraphObj *graph, Tensor input,
: OperatorObj(OpType::Split, {input},
((!outputs) ? TensorVec{nullptr} : (*outputs))),
dim(dim), num(-1), ratio(ratio) {
int rank = input->getRank();
dim = get_real_axis(dim, rank);
num = ratio.size();
if (!outputs) {
TensorVec tmp(num, nullptr);
@ -35,13 +40,11 @@ SplitObj::SplitObj(GraphObj *graph, Tensor input,
}
optional<vector<Shape>> SplitObj::inferShape(const TensorVec &inputs) const {
if (num == -1 || ratio.size() == 0)
return {};
IT_ASSERT(num != -1 && ratio.size() != 0);
auto inputDims = inputs[0]->getDims();
int totalSize = inputDims.at(dim);
int ratioSum = std::accumulate(ratio.begin(), ratio.end(), 0);
if (totalSize % ratioSum != 0)
return {};
IT_ASSERT(totalSize % ratioSum == 0);
int pieceSize = totalSize / ratioSum;

View File

@ -4,26 +4,32 @@ namespace infini {
TransposeObj::TransposeObj(GraphObj *graph, Tensor input, Tensor output,
vector<int> permute)
: OperatorObj(OpType::Transpose, {input}, {output}) {
if (permute.size() != 4) {
IT_TODO_HALT();
auto rank = input->getRank();
if (permute.empty()) {
for (size_t i = 0; i < rank; ++i) {
transposePermute[i] = i;
}
} else {
IT_ASSERT(rank == permute.size());
transposePermute = std::move(permute);
}
transposePermute[0] = permute[0];
transposePermute[1] = permute[1];
transposePermute[2] = permute[2];
transposePermute[3] = permute[3];
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>>
TransposeObj::inferShape(const TensorVec &inputs) const {
const auto A = inputs[0];
auto input = A->getDims();
auto output = input;
auto input_dim = A->getDims();
auto output_dim = input_dim;
int rank = A->getRank();
for (int i = 0; i < 4; ++i) {
output[i] = input[transposePermute[i]];
for (auto index : transposePermute) {
IT_ASSERT(index < rank);
}
return {{output}};
for (int i = 0; i < rank; ++i) {
output_dim[i] = input_dim[transposePermute[i]];
}
return {{output_dim}};
}
std::string TransposeObj::toString() const {

View File

@ -183,46 +183,54 @@ vector<int> CastObj::getOpAttrVector() const { return {type.underlying()}; }
DataType CastObj::getOutputDataType() const {
switch (castType) {
case CastObj::Float2Int64:
case CastType::Float2Float16:
return DataType::Float16;
case CastType::Float2Int64:
return DataType::Int64;
case CastObj::Float2Int32:
case CastType::Float2Int32:
return DataType::Int32;
case CastObj::Float2Int16:
case CastType::Float2Int16:
return DataType::Int16;
case CastObj::Float2Int8:
case CastType::Float2Int8:
return DataType::Int8;
case CastObj::Int322Float:
case CastType::Int322Float:
return DataType::Float32;
case CastObj::Int322Int8:
case CastType::Int322Int8:
return DataType::Int8;
case CastObj::Int322Int16:
case CastType::Int322Int16:
return DataType::Int16;
case CastObj::Int162Float:
case CastType::Int162Float:
return DataType::Float32;
case CastObj::Int162Int32:
case CastType::Int162Int32:
return DataType::Int32;
case CastObj::Int82Float:
case CastType::Int82Float:
return DataType::Float32;
case CastObj::Int82Int16:
case CastType::Int82Int16:
return DataType::Int16;
case CastObj::Int82Int32:
case CastType::Int82Int32:
return DataType::Int32;
case CastObj::Uint82Float:
case CastType::Uint82Float:
return DataType::Float32;
case CastObj::Uint82Int32:
case CastType::Uint82Int32:
return DataType::Int32;
case CastObj::Uint82Int64:
case CastType::Uint82Int64:
return DataType::Int64;
case CastObj::Int322Int64:
case CastType::Int322Int64:
return DataType::Int64;
case CastObj::Int642Int32:
case CastType::Int642Int32:
return DataType::Int32;
case CastObj::Int642Uint32:
case CastType::Int642Uint32:
return DataType::UInt32;
case CastObj::Int642Float:
case CastType::Int642Float:
return DataType::Float32;
case CastObj::Uint322Int64:
case CastType::Uint322Int64:
return DataType::Int64;
case CastType::Float162Float:
return DataType::Float32;
case CastType::BFloat162Float:
return DataType::Float32;
case CastType::Float2BFloat16:
return DataType::BFloat16;
default:
IT_TODO_HALT();
}
@ -234,7 +242,7 @@ ShapeObj::ShapeObj(GraphObj *graph, Tensor input, Tensor output)
}
optional<vector<Shape>> ShapeObj::inferShape(const TensorVec &inputs) const {
return {{{static_cast<int>(inputs[0]->getDims().size())}}};
return {{{static_cast<int>(inputs[0]->getRank())}}};
}
std::string ShapeObj::toString() const {

View File

@ -27,4 +27,17 @@ float fp16_to_float(const uint16_t x) {
u.u32 = r;
return u.f32;
}
uint16_t float_to_bfp16(const float x) {
Uf32 u;
u.f32 = x;
return u.u32 >> 16;
}
float bfp16_to_fp32(const uint16_t x) {
Uf32 u;
u.u32 = x << 16;
return u.f32;
}
} // namespace infini

View File

@ -12,7 +12,7 @@ void saveTensorData(TensorObj *tensor, std::string file_path) {
#ifdef TENSOR_PROTOBUF
data::Tensor temp;
temp.set_id("tensor_id");
for (size_t i = 0; i < tensor->getDims().size(); ++i) {
for (size_t i = 0; i < tensor->getRank(); ++i) {
temp.add_shape(tensor->getDims()[i]);
}
temp.set_layout(data::LAYOUT_NHWC);

View File

@ -0,0 +1,44 @@
#include "utils/operator_utils.h"
namespace infini {
Shape infer_broadcast(const Shape &A, const Shape &B) {
if (A.empty() && B.empty()) {
return {};
}
auto A_ = A;
auto B_ = B;
int rankA = A.size();
int rankB = B.size();
int rank = std::max(rankA, rankB);
if (rankA < rank) {
for (int i = 0; i < rank - rankA; ++i) {
A_.insert(A_.begin(), 1);
}
}
if (rankB < rank) {
for (int i = 0; i < rank - rankB; ++i) {
B_.insert(B_.begin(), 1);
}
}
Shape ret;
for (int i = 0; i < rank; ++i) {
IT_ASSERT(A_[i] == B_[i] || A_[i] == 1 || B_[i] == 1);
auto shapeEle = std::max(A_[i], B_[i]);
ret.emplace_back(shapeEle);
}
return ret;
}
int get_real_axis(const int &axis, const int &rank) {
IT_ASSERT(rank >= 1);
IT_ASSERT(axis >= -rank && axis <= (rank - 1));
int newAxis;
if (axis < 0) {
newAxis = rank + axis;
} else {
newAxis = axis;
}
return newAxis;
}
} // namespace infini

View File

@ -27,6 +27,30 @@ TEST(Matmul, ShapeInference) {
auto C = matmul->getOutputs()[0];
EXPECT_EQ(C->getDims(), (Shape{3, 4, 2}));
}
{
Graph g = make_ref<GraphObj>(runtime);
auto A = g->addTensor(Shape{1, 2, 3, 5});
auto B = g->addTensor(Shape{1, 1, 5, 2});
auto matmul = g->addOp<MatmulObj>(A, B, nullptr);
auto C = matmul->getOutputs()[0];
EXPECT_EQ(C->getDims(), (Shape{1, 2, 3, 2}));
}
{
Graph g = make_ref<GraphObj>(runtime);
auto A = g->addTensor(Shape{2, 3, 5, 4});
auto B = g->addTensor(Shape{1, 3, 5, 2});
auto matmul = g->addOp<MatmulObj>(A, B, nullptr, true, false);
auto C = matmul->getOutputs()[0];
EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2}));
}
{
Graph g = make_ref<GraphObj>(runtime);
auto A = g->addTensor(Shape{2, 3, 5, 4});
auto B = g->addTensor(Shape{1, 3, 2, 5});
auto matmul = g->addOp<MatmulObj>(A, B, nullptr, true, true);
auto C = matmul->getOutputs()[0];
EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2}));
}
}
}; // namespace infini

View File

@ -0,0 +1,32 @@
#include "core/graph.h"
#include "core/kernel.h"
#include "core/runtime.h"
#include "operators/transpose.h"
#include "test.h"
namespace infini {
TEST(Transpose, ShapeInference) {
Runtime runtime = NativeCpuRuntimeObj::getInstance();
{
Graph g = make_ref<GraphObj>(runtime);
Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32);
auto op = g->addOp<TransposeObj>(i, nullptr, Shape{0, 1, 2, 3});
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 3, 4}));
}
{
Graph g = make_ref<GraphObj>(runtime);
Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32);
auto op = g->addOp<TransposeObj>(i, nullptr, Shape{0, 2, 1, 3});
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 4}));
}
{
Graph g = make_ref<GraphObj>(runtime);
Tensor i = g->addTensor({2, 3, 4}, DataType::Float32);
auto op = g->addOp<TransposeObj>(i, nullptr, Shape{0, 2, 1});
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 4, 3}));
}
}
} // namespace infini