diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index ac0be526..b8b7bc9d 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -27,9 +27,9 @@ class GraphHandlerObj { int opw); Tensor matmul(Tensor a, Tensor b, Tensor y, bool transA, bool transB, Tensor bias, ActType act); - Tensor batchNorm(Tensor input, Tensor output, Tensor mean, Tensor var, - Tensor scale, Tensor bias, float momentum, float eps, - bool training); + Tensor batchNormalization(Tensor input, Tensor output, Tensor mean, + Tensor var, Tensor scale, Tensor bias, + float momentum, float eps, bool training); Tensor maxPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw); diff --git a/include/core/kernel.h b/include/core/kernel.h index c4192b66..3ef0d1b9 100644 --- a/include/core/kernel.h +++ b/include/core/kernel.h @@ -105,8 +105,8 @@ class KernelRegistry { IT_ASSERT(it != kernels.end(), "Kernel not found for key {" + to_string(enum_to_underlying(std::get<0>(kernelAttrs))) + - ", " + OpRegistry::getOpName(std::get<1>(kernelAttrs)) + - ", " + std::get<2>(kernelAttrs).toString() + "}"); + ", " + std::to_string(std::get<1>(kernelAttrs)) + ", " + + std::get<2>(kernelAttrs).toString() + "}"); return std::get<0>(it->second); } const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const { diff --git a/include/core/op_type.h b/include/core/op_type.h new file mode 100644 index 00000000..a5ea2524 --- /dev/null +++ b/include/core/op_type.h @@ -0,0 +1,253 @@ +#pragma once +#ifndef OP_TYPE_H +#define OP_TYPE_H + +#include +#include + +namespace infini { + +struct OpType { + using underlying_t = uint16_t; + + // Clang-format is ambiguous in formating of comment alignment. + // In order to disambiguate, it is necessary to comment all enum + // elements. + enum : underlying_t { + Unknown, + Abs, // Unary + Acos, // Unary + Acosh, // Unary + Add, // Binary + And, // Binary + ArgMax, // + Asin, // Binary + Asinh, // Binary + Atan, // Binary + Atanh, // Binary + AveragePool, // Pool + BatchNormalization, // + Bernoulli, // + BitShift, // Binary + BitwiseAnd, // Binary + BitwiseNot, // Binary + BitwiseOr, // Binary + BitwiseXor, // Binary + BlackmanWindow, // + Cast, // Unary + CastLike, // + Ceil, // Unary + Celu, // + CenterCropPad, // + Clip, // Unary + Col2lm, + Compress, + Concat, + ConcatFromSequence, + ConstantOfShape, + Conv, // ComputationIntensive + ConvInteger, // ComputationIntensive + ConvTranspose, // ComputationIntensive + Cos, // Unary + Cosh, // Unary + CumSum, + DFT, + DeformConv, // ComputationIntensive + DepthToSpace, + DequantizeLinear, + Det, + Div, // Binary + Dropout, + DynamicQuantizeLinear, + Einsum, + Elu, + Equal, // Compair + Erf, // Unary + Exp, // Unary + Expand, + EyeLike, + Flatten, + Floor, // Unary + GRU, + Gather, + GatherElements, + GatherND, + Gemm, + GlobalAveragePool, // GlobalPool + GlobalLpPool, // GlobalPool + GlobalMaxPool, // GlobalPool + Greater, // Compair + GreaterOrEqual, // Compair + GridSample, + GroupNormalization, + HammingWindow, + HannWindow, + HardSigmoid, + HardSwish, + Hardmax, + Identity, + If, + InstanceNormalization, + IsInf, + IsNaN, + LRN, + LSTM, + LayerNormalization, + LeakyRelu, + Less, // Compair + LessOrEqual, // Compair + Log, // Unary + LogSoftmax, + Loop, + LpNormalization, + LpPool, + MatMul, // ComputationIntensive + MatMulInteger, // ComputationIntensive + Max, + MaxPool, + MaxRoiPool, + MaxUnpool, + Mean, + MeanVarianceNormalization, + MelWeightMatrix, + Min, + Mish, + Mod, // Binary + Mul, // Binary + Multinomial, // + Neg, // Unary + NegativeLogLikelihoodLoss, + NonMaxSuppression, + NonZero, + Not, // Unary + OneHot, + Optional, + OptionalGetElement, + OptionalHasElement, + Or, // Binary + PRelu, // + Pad, // + Pow, // Binary + QLinearConv, // ComputationIntensive + QLinearMatMul, // ComputationIntensive + QuantizeLinear, + RNN, + RandomNormal, + RandomNormalLike, + RandomUniform, + RandomUniformLike, + Range, + Reciprocal, + ReduceL1, // Reduce + ReduceL2, // Reduce + ReduceLogSum, // Reduce + ReduceLogSumExp, // Reduce + ReduceMax, // Reduce + ReduceMean, // Reduce + ReduceMin, // Reduce + ReduceProd, // Reduce + ReduceSum, // Reduce + ReduceSumSquare, // Reduce + Relu, // Unary + Reshape, + Resize, + ReverseSequence, + RoiAlign, + Round, // Unary + STFT, + Scan, + Scatter, + ScatterElements, + ScatterND, + Selu, + SequenceAt, + SequenceConstruct, + SequenceEmpty, + SequenceErase, + SequenceInsert, + SequenceLength, + SequenceMap, + Shape, + Shrink, + Sigmoid, + Sign, + Sin, // Unary + Sinh, // Unary + Size, + Slice, + Softmax, + SoftmaxCrossEntropyLoss, + Softplus, + Softsign, + SpaceToDepth, + Split, + SplitToSequence, + Sqrt, + Squeeze, + StringNormalizer, + Sub, // Binary + Sum, // + Tan, // Unary + Tanh, // unary + TfIdfVectorizer, + ThresholdedRelu, + Tile, + TopK, + Transpose, + Trilu, + Unique, + Unsqueeze, + Upsample, + Where, + Xor, // Binary + // CUSTOM DEFINED + G2BMM, + GBMM, + MemBound, + // TODO + ConvTransNHWC, + ConvBackwardFilter, + ReluBackward, + SigmoidBackward, + TanhBackward, + + Fill, + Extend, + MSELoss, + Hardtanh, + L2Loss, + Rsqrt, + FloorDiv, + FloorMod, + Square, + SquaredDifference, + } type; + + constexpr OpType(decltype(type) t) : type(t) {} + constexpr explicit OpType(underlying_t val) : type((decltype(type))val) {} + constexpr underlying_t underlying() const { return type; } + + bool operator==(OpType others) const { return type == others.type; } + bool operator!=(OpType others) const { return type != others.type; } + bool operator<(OpType others) const { return type < others.type; } + + const char *toString() const; + bool isUnary() const; + bool isBinary() const; + bool isElementWise() const; + bool isCompair() const; + bool isPool() const; + bool isGlobalPool() const; + bool isMatMulOrConv() const; +}; + +enum class ActType { + None, + Relu, + Sigmoid, + Tanh, +}; + +} // namespace infini + +#endif // OP_TYPE_H diff --git a/include/core/operator.h b/include/core/operator.h index cca67297..d7a57633 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -1,231 +1,14 @@ #pragma once + +#include "core/op_type.h" #include "core/tensor.h" + namespace infini { - -enum class OpType { - Unknown = 0, - // linear - Conv = 100, - ConvBackwardFilter, - ConvBackwardData, - Matmul, - ConvTrans, - ConvTransNHWC, - G2BMM, - GBMM, - Pad, - Slice, - Concat, - Split, - Transpose, - Extend, - MaxPool, - AvgPool, - Add, - Sub, - Mul, - Div, - Pow, - Gather, - ReduceMean, - Reshape, - Flatten, - Identity, - // element wise - BatchNorm = 200, - Softmax, - Activation, - Relu, - ReluBackward, - PRelu, - Sigmoid, - SigmoidBackward, - Tanh, - TanhBackward, - Abs, - Sin, - Cos, - Tan, - ASin, - ACos, - ATan, - SinH, - CosH, - TanH, - ASinH, - ACosH, - ATanH, - Resize, - Arange, - Shape, - Copy, - Ceil, - Floor, - Clip, - Erf, - Exp, - Fill, - Log, - L2Loss, - Maximum, - Minimum, - MSELoss, - Neg, - Power, - Reciprocal, - Sqrt, - Rsqrt, - Cast, - FloorDiv, - FloorMod, - Det, - Round, - Square, - SquaredDifference, - Hardtanh, - Equal, - NotEqual, - GreaterThan, - GreaterEqual, - LessThan, - LessEqual, - And, - Or, - Xor, - Not, - BitAnd, - BitOr, - BitXor, - BitNot, - BitLeftShift, - BitRightShift, - Dropout, - // - MemBound = 300, -}; - -using KernelAttrs = std::tuple; - -class OpRegistry { - public: - static std::string getOpName(OpType opType) { -#define FOP(op) \ - case OpType::op: \ - return #op - - switch (opType) { - FOP(Unknown); - // linear - FOP(Conv); - FOP(ConvBackwardFilter); - FOP(ConvBackwardData); - FOP(Matmul); - FOP(ConvTrans); - FOP(G2BMM); - FOP(GBMM); - FOP(Pad); - FOP(Slice); - FOP(Concat); - FOP(Split); - FOP(Transpose); - FOP(Extend); - FOP(MaxPool); - FOP(AvgPool); - FOP(Add); - FOP(Sub); - FOP(Mul); - FOP(Div); - FOP(Pow); - FOP(Gather); - FOP(ReduceMean); - FOP(Reshape); - FOP(Identity); - FOP(Shape); - // element wise - FOP(BatchNorm); - FOP(Softmax); - FOP(Activation); - FOP(Relu); - FOP(ReluBackward); - FOP(PRelu); - FOP(Sigmoid); - FOP(SigmoidBackward); - FOP(Tanh); - FOP(TanhBackward); - FOP(Abs); - FOP(Sin); - FOP(Cos); - FOP(Tan); - FOP(ASin); - FOP(ACos); - FOP(ATan); - FOP(SinH); - FOP(CosH); - FOP(TanH); - FOP(ASinH); - FOP(ACosH); - FOP(ATanH); - FOP(Copy); - FOP(Ceil); - FOP(Floor); - FOP(Clip); - FOP(Erf); - FOP(Exp); - FOP(Fill); - FOP(Log); - FOP(L2Loss); - FOP(Maximum); - FOP(Minimum); - FOP(MSELoss); - FOP(Neg); - FOP(Power); - FOP(Reciprocal); - FOP(Sqrt); - FOP(Rsqrt); - FOP(Cast); - FOP(FloorDiv); - FOP(FloorMod); - FOP(Det); - FOP(Round); - FOP(Square); - FOP(SquaredDifference); - FOP(Hardtanh); - FOP(Equal); - FOP(NotEqual); - FOP(GreaterThan); - FOP(GreaterEqual); - FOP(LessThan); - FOP(LessEqual); - FOP(And); - FOP(Or); - FOP(Xor); - FOP(Not); - FOP(BitAnd); - FOP(BitOr); - FOP(BitXor); - FOP(BitNot); - FOP(BitLeftShift); - FOP(BitRightShift); - // - FOP(MemBound); - default: - IT_ASSERT(false); - break; - } -#undef FOP - } -}; - -enum class ActType { - None, - Relu, - Sigmoid, - Tanh, -}; +using KernelAttrs = std::tuple; struct OpPerfKey { HashType hash; - OpType opType; + OpType::underlying_t opType; vector attrs; public: @@ -233,7 +16,7 @@ struct OpPerfKey { // https://github.com/nlohmann/json#how-can-i-use-get-for-non-default-constructiblenon-copyable-types OpPerfKey() = default; OpPerfKey(HashType hash, OpType opType, vector attrs = {}) - : hash(hash), opType(opType), attrs(attrs) {} + : hash(hash), opType(opType.underlying()), attrs(attrs) {} bool operator==(const OpPerfKey &rhs) const { if (hash != rhs.hash) return false; @@ -290,16 +73,7 @@ class OperatorObj : public Object { */ HashType hash() const; - public: // check Op type - bool isLinearOp() const; - bool isElementWiseOp() const; - bool isSplitOp() const; - bool isConcatOp() const; - bool isComputeOp() const; - bool isTransposeOp() const; - bool isReshapeOp() const; - bool isMemBoundOp() const; - + public: public: // getter and setter const TensorVec &getInputs() const { return inputs; } const TensorVec &getOutputs() const { return outputs; } diff --git a/include/core/runtime.h b/include/core/runtime.h index 53920fdb..2fe0467c 100644 --- a/include/core/runtime.h +++ b/include/core/runtime.h @@ -1,5 +1,6 @@ #pragma once #include "core/common.h" +#include "core/op_type.h" #include "core/ref.h" #include @@ -21,7 +22,6 @@ using Graph = Ref; using GraphHandler = Ref; using Runtime = Ref; using Blob = Ref; -enum class OpType; using TensorVec = vector; using OpVec = vector; diff --git a/include/operators/element_wise.h b/include/operators/element_wise.h index 8bda60de..e198de75 100644 --- a/include/operators/element_wise.h +++ b/include/operators/element_wise.h @@ -65,26 +65,24 @@ DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub) DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul) DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div) DEFINE_ELEMENT_WISE_OBJ(Pow, OpType::Pow) -DEFINE_ELEMENT_WISE_OBJ(Maximum, OpType::Maximum) -DEFINE_ELEMENT_WISE_OBJ(Minimum, OpType::Minimum) -DEFINE_ELEMENT_WISE_OBJ(Power, OpType::Power) +DEFINE_ELEMENT_WISE_OBJ(Maximum, OpType::Max) +DEFINE_ELEMENT_WISE_OBJ(Minimum, OpType::Min) +DEFINE_ELEMENT_WISE_OBJ(Power, OpType::Pow) DEFINE_ELEMENT_WISE_OBJ(FloorDiv, OpType::FloorDiv) DEFINE_ELEMENT_WISE_OBJ(FloorMod, OpType::FloorMod) DEFINE_ELEMENT_WISE_OBJ(SquaredDifference, OpType::SquaredDifference) DEFINE_ELEMENT_WISE_OBJ(Equal, OpType::Equal) -DEFINE_ELEMENT_WISE_OBJ(NotEqual, OpType::NotEqual) -DEFINE_ELEMENT_WISE_OBJ(GreaterThan, OpType::GreaterThan) -DEFINE_ELEMENT_WISE_OBJ(GreaterEqual, OpType::GreaterEqual) -DEFINE_ELEMENT_WISE_OBJ(LessThan, OpType::LessThan) -DEFINE_ELEMENT_WISE_OBJ(LessEqual, OpType::LessEqual) +DEFINE_ELEMENT_WISE_OBJ(GreaterThan, OpType::Greater) +DEFINE_ELEMENT_WISE_OBJ(GreaterEqual, OpType::GreaterOrEqual) +DEFINE_ELEMENT_WISE_OBJ(LessThan, OpType::Less) +DEFINE_ELEMENT_WISE_OBJ(LessEqual, OpType::LessOrEqual) DEFINE_ELEMENT_WISE_OBJ(And, OpType::And) DEFINE_ELEMENT_WISE_OBJ(Or, OpType::Or) DEFINE_ELEMENT_WISE_OBJ(Xor, OpType::Xor) DEFINE_ELEMENT_WISE_OBJ(Not, OpType::Not) -DEFINE_ELEMENT_WISE_OBJ(BitAnd, OpType::BitAnd) -DEFINE_ELEMENT_WISE_OBJ(BitOr, OpType::BitOr) -DEFINE_ELEMENT_WISE_OBJ(BitXor, OpType::BitXor) -DEFINE_ELEMENT_WISE_OBJ(BitNot, OpType::BitNot) -DEFINE_ELEMENT_WISE_OBJ(BitLeftShift, OpType::BitLeftShift) -DEFINE_ELEMENT_WISE_OBJ(BitRightShift, OpType::BitRightShift) +DEFINE_ELEMENT_WISE_OBJ(BitAnd, OpType::BitwiseAnd) +DEFINE_ELEMENT_WISE_OBJ(BitOr, OpType::BitwiseOr) +DEFINE_ELEMENT_WISE_OBJ(BitXor, OpType::BitwiseXor) +DEFINE_ELEMENT_WISE_OBJ(BitNot, OpType::BitwiseNot) +DEFINE_ELEMENT_WISE_OBJ(BitLeftShift, OpType::BitShift) }; // namespace infini diff --git a/include/operators/pooling.h b/include/operators/pooling.h index c14bb8ad..a163ab0f 100644 --- a/include/operators/pooling.h +++ b/include/operators/pooling.h @@ -70,7 +70,7 @@ class AvgPoolObj : public PoolingObj { public: AvgPoolObj(GraphObj *graph, Tensor input, Tensor output, int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw) - : PoolingObj(graph, OpType::AvgPool, input, output, kh, kw, dh, dw, ph, - pw, sh, sw) {} + : PoolingObj(graph, OpType::AveragePool, input, output, kh, kw, dh, dw, + ph, pw, sh, sw) {} }; }; // namespace infini diff --git a/include/operators/unary.h b/include/operators/unary.h index 1df2b4a7..3e94a548 100644 --- a/include/operators/unary.h +++ b/include/operators/unary.h @@ -197,27 +197,6 @@ class CumsumObj : public OperatorObj { vector getOpAttrVector() const override; }; -class ArangeObj : public OperatorObj { - public: - ArangeObj(GraphObj *graph, float start, float step, int length, - Tensor output); - OP_CLONE(ArangeObj); - optional> inferShape(const TensorVec &inputs) const override; - - std::string toString() const override; - int numInputs() const override { return 0; } - int numOutputs() const override { return 1; } - float getStartValue() { return startValue; } - float getStepValue() { return stepValue; } - int getLength() { return lengthValue; } - - private: - float startValue, stepValue; - int lengthValue; - vector getWorkloadVector() const override; - vector getOpAttrVector() const override; -}; - class ShapeObj : public OperatorObj { public: ShapeObj(GraphObj *graph, Tensor input, Tensor output); @@ -283,17 +262,16 @@ DEFINE_UNARY_OBJ(Abs, OpType::Abs) DEFINE_UNARY_OBJ(Sin, OpType::Sin) DEFINE_UNARY_OBJ(Cos, OpType::Cos) DEFINE_UNARY_OBJ(Tan, OpType::Tan) -DEFINE_UNARY_OBJ(ASin, OpType::ASin) -DEFINE_UNARY_OBJ(ACos, OpType::ACos) -DEFINE_UNARY_OBJ(ATan, OpType::ATan) -DEFINE_UNARY_OBJ(SinH, OpType::SinH) -DEFINE_UNARY_OBJ(CosH, OpType::CosH) -DEFINE_UNARY_OBJ(TanH, OpType::TanH) -DEFINE_UNARY_OBJ(ASinH, OpType::ASinH) -DEFINE_UNARY_OBJ(ACosH, OpType::ACosH) -DEFINE_UNARY_OBJ(ATanH, OpType::ATanH) +DEFINE_UNARY_OBJ(ASin, OpType::Asin) +DEFINE_UNARY_OBJ(ACos, OpType::Acos) +DEFINE_UNARY_OBJ(ATan, OpType::Atan) +DEFINE_UNARY_OBJ(SinH, OpType::Sinh) +DEFINE_UNARY_OBJ(CosH, OpType::Cosh) +DEFINE_UNARY_OBJ(TanH, OpType::Tanh) +DEFINE_UNARY_OBJ(ASinH, OpType::Asinh) +DEFINE_UNARY_OBJ(ACosH, OpType::Acosh) +DEFINE_UNARY_OBJ(ATanH, OpType::Atanh) -DEFINE_UNARY_OBJ(Copy, OpType::Copy) DEFINE_UNARY_OBJ(Ceil, OpType::Ceil) DEFINE_UNARY_OBJ(Floor, OpType::Floor) DEFINE_UNARY_OBJ(Erf, OpType::Erf) @@ -301,7 +279,5 @@ DEFINE_UNARY_OBJ(Exp, OpType::Exp) DEFINE_UNARY_OBJ(Neg, OpType::Neg) DEFINE_UNARY_OBJ(Reciprocal, OpType::Reciprocal) DEFINE_UNARY_OBJ(Sqrt, OpType::Sqrt) -DEFINE_UNARY_OBJ(Rsqrt, OpType::Rsqrt) DEFINE_UNARY_OBJ(Round, OpType::Round) -DEFINE_UNARY_OBJ(Square, OpType::Square) }; // namespace infini diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 6f686b58..1a5186a7 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -196,7 +196,7 @@ class OnnxStub: attributes[name] for name in ["momentum", "epsilon", "training_mode"] ) - tensors[node.output[0]] = self.handler.batchNorm( + tensors[node.output[0]] = self.handler.batchNormalization( input, output, mean, var, scale, bias, momentum, eps, training != 0 ) elif node.op_type == "MaxPool": @@ -551,7 +551,7 @@ class OnnxStub: # saves object names, including tensors and operators names: Dict[Union[backend.Tensor, backend.Operator], str] = dict() # counts the occurrence times of each operator for naming - count_op: Dict[backend.OpType, int] = dict() + count_op: Dict[backend.OpTypeId, int] = dict() # counts input and output tensors for naming count_in, count_out = 0, 0 # saves nodes (operators) @@ -563,8 +563,8 @@ class OnnxStub: # saves global input tensors initializers: List[TensorProto] = [] - def name_op(self, op: backend.Operator) -> Tuple[backend.OpType, str]: - ty = op.op_type() + def name_op(self, op: backend.Operator) -> Tuple[backend.OpTypeId, str]: + ty = op.op_type().id() name = "{}{}".format(ty.name, self.count_op.setdefault(ty, 0) + 1) self.names[op] = name self.count_op[ty] += 1 @@ -647,7 +647,7 @@ class OnnxStub: ctx.push_output("{}_{}".format(name, i), it) for (i, it) in enumerate(op.outputs()) ] - if ty == backend.OpType.Conv: + if ty == backend.OpTypeId.Conv: ph, pw, dh, dw, sh, sw = backend.conv_attrs_of(op) ctx.push_node( make_node( @@ -661,11 +661,11 @@ class OnnxStub: group=op.inputs()[0].shape()[1] // op.inputs()[1].shape()[1], ) ) - elif ty == backend.OpType.ConvTrans: + elif ty == backend.OpTypeId.ConvTranspose: ph, pw, sh, sw, dh, dw, oph, opw = backend.conv_trans_attrs_of(op) ctx.push_node( make_node( - "ConvTranspose", + ty.name, inputs, outputs, name, @@ -675,14 +675,14 @@ class OnnxStub: output_padding=[oph, opw], ) ) - elif ty == backend.OpType.Matmul: + elif ty == backend.OpTypeId.MatMul: transA, transB = backend.matmul_attrs_of(op) ctx.push_node( make_node( "Gemm", inputs, outputs, name, transA=transA, transB=transB ) ) - elif ty == backend.OpType.BatchNorm: + elif ty == backend.OpTypeId.BatchNormalization: inputs = [inputs[i] for i in [0, 3, 4, 1, 2]] momentum, eps, training = backend.batch_norm_attrs_of(op) ctx.push_node( @@ -696,7 +696,7 @@ class OnnxStub: training_mode=training, ) ) - elif ty == backend.OpType.MaxPool: + elif ty == backend.OpTypeId.MaxPool: kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) ctx.push_node( make_node( @@ -710,7 +710,7 @@ class OnnxStub: strides=[sh, sw], ) ) - elif ty == backend.OpType.AvgPool: + elif ty == backend.OpTypeId.AveragePool: kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) ctx.push_node( make_node( @@ -724,27 +724,27 @@ class OnnxStub: ) ) elif ty in [ - backend.OpType.Add, - backend.OpType.Sub, - backend.OpType.Mul, - backend.OpType.Div, - backend.OpType.Pow, - backend.OpType.Relu, - backend.OpType.Sigmoid, - backend.OpType.Tanh, - backend.OpType.Softmax, - backend.OpType.Abs, - backend.OpType.Identity, - backend.OpType.PRelu, + backend.OpTypeId.Add, + backend.OpTypeId.Sub, + backend.OpTypeId.Mul, + backend.OpTypeId.Div, + backend.OpTypeId.Pow, + backend.OpTypeId.Relu, + backend.OpTypeId.Sigmoid, + backend.OpTypeId.Tanh, + backend.OpTypeId.Softmax, + backend.OpTypeId.Abs, + backend.OpTypeId.Identity, + backend.OpTypeId.PRelu, ]: ctx.push_node(make_node(ty.name, inputs, outputs, name)) - elif ty == backend.OpType.Flatten: + elif ty == backend.OpTypeId.Flatten: axis = backend.flatten_axis_of(op) ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) - elif ty == backend.OpType.Transpose: + elif ty == backend.OpTypeId.Transpose: perm = backend.transpose_permute_of(op) ctx.push_node(make_node(ty.name, inputs, outputs, name, perm=perm)) - elif ty == backend.OpType.Reshape: + elif ty == backend.OpTypeId.Reshape: shape = backend.reshape_shape_of(op) inputs.append( ctx.push_data_input( @@ -756,10 +756,10 @@ class OnnxStub: ) ) ctx.push_node(make_node(ty.name, inputs, outputs, name)) - elif ty == backend.OpType.Concat: + elif ty == backend.OpTypeId.Concat: axis = backend.concat_axis_of(op) ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) - elif ty == backend.OpType.Split: + elif ty == backend.OpTypeId.Split: axis = backend.split_axis_of(op) num_outputs = len(outputs) split = op.inputs()[0].shape()[axis] // num_outputs @@ -781,10 +781,10 @@ class OnnxStub: axis=axis, ) ) - elif ty == backend.OpType.Gather: + elif ty == backend.OpTypeId.Gather: axis = backend.gather_axis_of(op) ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) - elif ty == backend.OpType.ReduceMean: + elif ty == backend.OpTypeId.ReduceMean: axes, keepdims = backend.reduce_mean_attrs_of(op) inputs.append( ctx.push_data_input( @@ -794,9 +794,9 @@ class OnnxStub: ctx.push_node( make_node(ty.name, inputs, outputs, name, keepdims=keepdims) ) - elif ty == backend.OpType.Slice: + elif ty == backend.OpTypeId.Slice: raise Exception("TODO") - elif ty == backend.OpType.Pad: + elif ty == backend.OpTypeId.Pad: pads = backend.pad_pads_of(op) inputs.append( ctx.push_data_input( @@ -804,7 +804,7 @@ class OnnxStub: ) ) ctx.push_node(make_node(ty.name, inputs, outputs, name)) - elif ty == backend.OpType.Clip: + elif ty == backend.OpTypeId.Clip: min, max = backend.clip_attrs_of(op) if min != None: inputs.append( diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index 166d0df2..4283a981 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -108,7 +108,7 @@ class TestStringMethods(unittest.TestCase): name="batchNormalization", ) make_and_import_model( - make_graph([batch_norm], "batchNorm", [x, scale, b, mean, var], [y]) + make_graph([batch_norm], "batchNormalzation", [x, scale, b, mean, var], [y]) ) def test_max_pool(self): diff --git a/src/bang/bang_runtime.cc b/src/bang/bang_runtime.cc index 66e2b9b0..d909b57c 100644 --- a/src/bang/bang_runtime.cc +++ b/src/bang/bang_runtime.cc @@ -13,7 +13,8 @@ void BangRuntimeObj::runWithoutSync(const Graph &graph, bool tune = false, std::map opCnt; for (auto &op : graph->getOperators()) { // HACK: set correct data type - auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; + auto kernelAttrs = KernelAttrs{device, op->getOpType().underlying(), + DataType::Float32}; Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()}; auto perfData = perfEngine.getPerfData(perfKey); diff --git a/src/core/dummy_mutator.cc b/src/core/dummy_mutator.cc index be6ee705..50543914 100644 --- a/src/core/dummy_mutator.cc +++ b/src/core/dummy_mutator.cc @@ -48,7 +48,7 @@ bool DummyMutator::isMultiBranchMergable(const Graph &inGraph) { if (inGraph->getOperators().size() != 2) return false; for (auto op : inGraph->getOperators()) { - if (op->getOpType() != OpType::Matmul) + if (op->getOpType() != OpType::MatMul) return false; if (op->getPredecessors().size() > 0) return false; diff --git a/src/core/graph.cc b/src/core/graph.cc index f52f8af7..f8934b65 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -116,7 +116,7 @@ bool GraphObj::topo_sort() { void GraphObj::optimize() { for (auto &op : ops) { - switch (op->getOpType()) { + switch (op->getOpType().underlying()) { default: break; } @@ -151,7 +151,7 @@ TensorVec GraphObj::addTensor(const TensorVec &tensors) { OpVec GraphObj::getComputeOps() const { OpVec opList; for (auto op : ops) - if (op->isComputeOp()) + if (op->getOpType().isMatMulOrConv()) opList.emplace_back(op); return opList; } diff --git a/src/core/graph_handler.cc b/src/core/graph_handler.cc index d032edd6..17074ef7 100644 --- a/src/core/graph_handler.cc +++ b/src/core/graph_handler.cc @@ -69,9 +69,11 @@ Tensor GraphHandlerObj::matmul(Tensor a, Tensor b, Tensor y, bool transA, } } -Tensor GraphHandlerObj::batchNorm(Tensor input, Tensor output, Tensor mean, - Tensor var, Tensor scale, Tensor bias, - float momentum, float eps, bool training) { +Tensor GraphHandlerObj::batchNormalization(Tensor input, Tensor output, + Tensor mean, Tensor var, + Tensor scale, Tensor bias, + float momentum, float eps, + bool training) { if (output) { g->addOpWithOutputs( std::move(input), output, std::move(mean), std::move(var), diff --git a/src/core/op_type.cc b/src/core/op_type.cc new file mode 100644 index 00000000..f12c4a9b --- /dev/null +++ b/src/core/op_type.cc @@ -0,0 +1,278 @@ +#include "core/op_type.h" + +namespace infini { +const char *OpType::toString() const { +#define CASE(NAME) \ + case OpType::NAME: \ + return #NAME + + switch (type) { + CASE(Unknown); + CASE(Abs); + CASE(Acos); + CASE(Acosh); + CASE(Add); + CASE(And); + CASE(ArgMax); + CASE(Asin); + CASE(Asinh); + CASE(Atan); + CASE(Atanh); + CASE(AveragePool); + CASE(BatchNormalization); + CASE(Bernoulli); + CASE(BitShift); + CASE(BitwiseAnd); + CASE(BitwiseNot); + CASE(BitwiseOr); + CASE(BitwiseXor); + CASE(BlackmanWindow); + CASE(Cast); + CASE(CastLike); + CASE(Ceil); + CASE(Celu); + CASE(CenterCropPad); + CASE(Clip); + CASE(Col2lm); + CASE(Compress); + CASE(Concat); + CASE(ConcatFromSequence); + CASE(ConstantOfShape); + CASE(Conv); + CASE(ConvInteger); + CASE(ConvTranspose); + CASE(Cos); + CASE(Cosh); + CASE(CumSum); + CASE(DFT); + CASE(DeformConv); + CASE(DepthToSpace); + CASE(DequantizeLinear); + CASE(Det); + CASE(Div); + CASE(Dropout); + CASE(DynamicQuantizeLinear); + CASE(Einsum); + CASE(Elu); + CASE(Equal); + CASE(Erf); + CASE(Exp); + CASE(Expand); + CASE(EyeLike); + CASE(Flatten); + CASE(Floor); + CASE(GRU); + CASE(Gather); + CASE(GatherElements); + CASE(GatherND); + CASE(Gemm); + CASE(GlobalAveragePool); + CASE(GlobalLpPool); + CASE(GlobalMaxPool); + CASE(Greater); + CASE(GreaterOrEqual); + CASE(GridSample); + CASE(GroupNormalization); + CASE(HammingWindow); + CASE(HannWindow); + CASE(HardSigmoid); + CASE(HardSwish); + CASE(Hardmax); + CASE(Identity); + CASE(If); + CASE(InstanceNormalization); + CASE(IsInf); + CASE(IsNaN); + CASE(LRN); + CASE(LSTM); + CASE(LayerNormalization); + CASE(LeakyRelu); + CASE(Less); + CASE(LessOrEqual); + CASE(Log); + CASE(LogSoftmax); + CASE(Loop); + CASE(LpNormalization); + CASE(LpPool); + CASE(MatMul); + CASE(MatMulInteger); + CASE(Max); + CASE(MaxPool); + CASE(MaxRoiPool); + CASE(MaxUnpool); + CASE(Mean); + CASE(MeanVarianceNormalization); + CASE(MelWeightMatrix); + CASE(Min); + CASE(Mish); + CASE(Mod); + CASE(Mul); + CASE(Multinomial); + CASE(Neg); + CASE(NegativeLogLikelihoodLoss); + CASE(NonMaxSuppression); + CASE(NonZero); + CASE(Not); + CASE(OneHot); + CASE(Optional); + CASE(OptionalGetElement); + CASE(OptionalHasElement); + CASE(Or); + CASE(PRelu); + CASE(Pad); + CASE(Pow); + CASE(QLinearConv); + CASE(QLinearMatMul); + CASE(QuantizeLinear); + CASE(RNN); + CASE(RandomNormal); + CASE(RandomNormalLike); + CASE(RandomUniform); + CASE(RandomUniformLike); + CASE(Range); + CASE(Reciprocal); + CASE(ReduceL1); + CASE(ReduceL2); + CASE(ReduceLogSum); + CASE(ReduceLogSumExp); + CASE(ReduceMax); + CASE(ReduceMean); + CASE(ReduceMin); + CASE(ReduceProd); + CASE(ReduceSum); + CASE(ReduceSumSquare); + CASE(Relu); + CASE(Reshape); + CASE(Resize); + CASE(ReverseSequence); + CASE(RoiAlign); + CASE(Round); + CASE(STFT); + CASE(Scan); + CASE(Scatter); + CASE(ScatterElements); + CASE(ScatterND); + CASE(Selu); + CASE(SequenceAt); + CASE(SequenceConstruct); + CASE(SequenceEmpty); + CASE(SequenceErase); + CASE(SequenceInsert); + CASE(SequenceLength); + CASE(SequenceMap); + CASE(Shape); + CASE(Shrink); + CASE(Sigmoid); + CASE(Sign); + CASE(Sin); + CASE(Sinh); + CASE(Size); + CASE(Slice); + CASE(Softmax); + CASE(SoftmaxCrossEntropyLoss); + CASE(Softplus); + CASE(Softsign); + CASE(SpaceToDepth); + CASE(Split); + CASE(SplitToSequence); + CASE(Sqrt); + CASE(Squeeze); + CASE(StringNormalizer); + CASE(Sub); + CASE(Sum); + CASE(Tan); + CASE(Tanh); + CASE(TfIdfVectorizer); + CASE(ThresholdedRelu); + CASE(Tile); + CASE(TopK); + CASE(Transpose); + CASE(Trilu); + CASE(Unique); + CASE(Unsqueeze); + CASE(Upsample); + CASE(Where); + CASE(Xor); + // CUSTOM DEFINED + CASE(G2BMM); + CASE(GBMM); + CASE(MemBound); + // TODO + CASE(ConvTransNHWC); + CASE(ConvBackwardFilter); + CASE(ReluBackward); + CASE(SigmoidBackward); + CASE(TanhBackward); + + CASE(Fill); + CASE(Extend); + CASE(MSELoss); + CASE(Hardtanh); + CASE(L2Loss); + CASE(Rsqrt); + CASE(FloorDiv); + CASE(FloorMod); + CASE(Square); + CASE(SquaredDifference); + default: + return "Unknown"; + } + +#undef CASE +} + +bool OpType::isUnary() const { + static const std::unordered_set set{ + Abs, Acos, Acosh, Asin, Asinh, Atan, Atanh, Cast, Ceil, + Clip, Cos, Cosh, Erf, Exp, Floor, Log, Neg, Not, + Relu, Round, Sigmoid, Sin, Sinh, Sqrt, Tan, Tanh, + }; + + return set.find(type) != set.end(); +} + +bool OpType::isBinary() const { + static const std::unordered_set set{ + Add, And, BitShift, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, + Div, Mod, Mul, Or, Pow, Sub, Xor, + }; + + return set.find(type) != set.end() || isCompair(); +} + +bool OpType::isElementWise() const { return isUnary() || isBinary(); } + +bool OpType::isCompair() const { + static const std::unordered_set set{ + Equal, Greater, GreaterOrEqual, Less, LessOrEqual, + }; + + return set.find(type) != set.end(); +} + +bool OpType::isPool() const { + static const std::unordered_set set{}; + + return set.find(type) != set.end(); +} + +bool OpType::isGlobalPool() const { + static const std::unordered_set set{ + GlobalAveragePool, + GlobalLpPool, + GlobalMaxPool, + }; + + return set.find(type) != set.end(); +} + +bool OpType::isMatMulOrConv() const { + static const std::unordered_set set{ + Conv, ConvInteger, ConvTranspose, DeformConv, + QLinearConv, MatMul, MatMulInteger, QLinearMatMul, + }; + + return set.find(type) != set.end(); +} + +} // namespace infini diff --git a/src/core/operator.cc b/src/core/operator.cc index 743c49bd..462cb2a2 100644 --- a/src/core/operator.cc +++ b/src/core/operator.cc @@ -10,33 +10,6 @@ OperatorObj::OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs) IT_ASSERT(t); } -bool OperatorObj::isLinearOp() const { - return enum_to_underlying(type) >= 100 && enum_to_underlying(type) < 200; -} - -bool OperatorObj::isElementWiseOp() const { - return enum_to_underlying(type) >= 200 && enum_to_underlying(type) < 300; -} - -bool OperatorObj::isSplitOp() const { return type == OpType::Split; } - -bool OperatorObj::isConcatOp() const { return type == OpType::Concat; } - -bool OperatorObj::isComputeOp() const { - return type == OpType::Conv || type == OpType::Matmul || - type == OpType::ConvTrans || type == OpType::ConvTransNHWC || - type == OpType::G2BMM || type == OpType::GBMM; -} - -bool OperatorObj::isTransposeOp() const { return type == OpType::Transpose; } - -bool OperatorObj::isReshapeOp() const { return type == OpType::Reshape; } - -bool OperatorObj::isMemBoundOp() const { - return type == OpType::MemBound || type == OpType::Activation || - type == OpType::Transpose; -} - void OperatorObj::removePredecessors(const Operator &op) { for (auto it = predecessors.begin(); it != predecessors.end();) { if (it->lock() == op) @@ -69,14 +42,14 @@ OpPerfKey OperatorObj::getOpPerfKey() const { // Operator::hash, which hashes operator attributes and ignores tensor // shapes. HashType hash = 0; - hash = hashAppend(hash, enum_to_underlying(type)); + hash = hashAppend(hash, type.underlying()); hash = hashAppend(hash, hashVector(workloadVector)); return OpPerfKey(hash, type, workloadVector); } HashType OperatorObj::hash() const { HashType hash = 0; - hash = hashAppend(hash, enum_to_underlying(type)); + hash = hashAppend(hash, type.underlying()); hash = hashAppend(hash, hashVector(getOpAttrVector())); return hash; } diff --git a/src/core/runtime.cc b/src/core/runtime.cc index 1e1e7c1d..4d64d433 100644 --- a/src/core/runtime.cc +++ b/src/core/runtime.cc @@ -17,7 +17,8 @@ void CpuRuntimeObj::run(const Graph &graph, bool tune, bool profiling) const { std::map opCnt; for (auto &op : graph->getOperators()) { - auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; + auto kernelAttrs = + KernelAttrs{device, op->getOpType().underlying(), op->getDType()}; Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()}; auto perfData = perfEngine.getPerfData(perfKey); @@ -65,7 +66,8 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling) const { std::map opCnt; for (auto &op : graph->getOperators()) { - auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; + auto kernelAttrs = + KernelAttrs{device, op->getOpType().underlying(), op->getDType()}; Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()}; auto perfData = perfEngine.getPerfData(perfKey); @@ -116,9 +118,8 @@ void RuntimeObj::printProfilingData(double totalTime, const std::map &opCnt) const { printf("%11s %3s %7s %7s %7s\n", "Op", "Cnt", "T_tot", "Percent", "T_mean"); for (const auto &[type, t] : opTime) { - printf("%11s %3d %7.3f %7.1f %7.3f\n", - OpRegistry::getOpName(type).data(), opCnt.at(type), t, - t / totalTime * 100, t / opCnt.at(type)); + printf("%11s %3d %7.3f %7.1f %7.3f\n", type.toString(), opCnt.at(type), + t, t / totalTime * 100, t / opCnt.at(type)); } } diff --git a/src/core/search_engine.cc b/src/core/search_engine.cc index af643686..a1d9b062 100644 --- a/src/core/search_engine.cc +++ b/src/core/search_engine.cc @@ -127,7 +127,7 @@ SearchEngine::buildMetaGraphWithGraph(const Graph graph) { std::vector ops; ops.emplace_back(op); node.graph = make_ref(runtimeExec, ops); - node.type = op->isComputeOp(); + node.type = op->getOpType().isMatMulOrConv(); node.cnt = op->getPredecessors().size(); opMap.emplace(op->getGuid(), i); metaGraph->nodes.emplace_back(node); @@ -196,7 +196,7 @@ std::shared_ptr SearchEngine::buildMetaGraphWithPlan( } node.graph = make_ref(runtimeExec, ops); node.cnt = node.pre.size(); - node.type = ops[0]->isComputeOp(); + node.type = ops[0]->getOpType().isMatMulOrConv(); resultMetaGraph->nodes.emplace_back(node); } } @@ -404,7 +404,7 @@ std::vector SearchEngine::partitionGraph(const Graph graph) { headOps.emplace_back(op); if (op->getPredecessors().size() + op->getSuccessors().size() >= (size_t)partitionThreshold && - !op->isComputeOp()) { + !op->getOpType().isMatMulOrConv()) { auto preOrderI = preOrder[op->getGuid()]; auto postOrderI = postOrder[op->getGuid()]; for (size_t j = 0; j < i; j++) { diff --git a/src/cuda/cuda_runtime.cc b/src/cuda/cuda_runtime.cc index 06a53c4e..972bfb4c 100644 --- a/src/cuda/cuda_runtime.cc +++ b/src/cuda/cuda_runtime.cc @@ -11,7 +11,8 @@ void CudaRuntimeObj::runWithoutSync(const Graph &graph) const { auto &perfEngine = PerfEngine::getInstance(); for (auto &op : graph->getOperators()) { // HACK: set correct data type - auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; + auto kernelAttrs = KernelAttrs{device, op->getOpType().underlying(), + DataType::Float32}; Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()}; auto perfData = perfEngine.getPerfData(perfKey); @@ -32,7 +33,8 @@ void CudaRuntimeObj::tune(const Graph &graph, bool profiling = false) const { std::map opCnt; for (auto &op : graph->getOperators()) { // HACK: set correct data type - auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; + auto kernelAttrs = KernelAttrs{device, op->getOpType().underlying(), + DataType::Float32}; Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()}; auto perfData = perfEngine.getPerfData(perfKey); diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 9ca5d995..30237a90 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -48,6 +48,8 @@ void register_operator_timer(py::module &m) { #endif } +decltype(OpType::type) getId(OpType const *const ptr) { return ptr->type; } + void export_values(py::module &m) { #define VALUE(TYPE, NAME) value(#NAME, TYPE::NAME) @@ -58,13 +60,13 @@ void export_values(py::module &m) { .VALUE(ActType, Tanh) .export_values(); - py::enum_(m, "OpType") - .VALUE(OpType, Unknown) + py::class_(m, "OpType") + .def(py::init()) + .def("id", getId, policy::automatic); + py::enum_(m, "OpTypeId") .VALUE(OpType, Conv) - .VALUE(OpType, Matmul) - .VALUE(OpType, ConvTrans) - .VALUE(OpType, G2BMM) - .VALUE(OpType, GBMM) + .VALUE(OpType, MatMul) + .VALUE(OpType, ConvTranspose) .VALUE(OpType, Pad) .VALUE(OpType, Clip) .VALUE(OpType, Slice) @@ -73,7 +75,7 @@ void export_values(py::module &m) { .VALUE(OpType, Transpose) .VALUE(OpType, Extend) .VALUE(OpType, MaxPool) - .VALUE(OpType, AvgPool) + .VALUE(OpType, AveragePool) .VALUE(OpType, Add) .VALUE(OpType, Sub) .VALUE(OpType, Mul) @@ -84,9 +86,8 @@ void export_values(py::module &m) { .VALUE(OpType, Reshape) .VALUE(OpType, Flatten) .VALUE(OpType, Identity) - .VALUE(OpType, BatchNorm) + .VALUE(OpType, BatchNormalization) .VALUE(OpType, Softmax) - .VALUE(OpType, Activation) .VALUE(OpType, Relu) .VALUE(OpType, PRelu) .VALUE(OpType, Sigmoid) @@ -152,7 +153,7 @@ static std::tuple conv_attrs_of(Operator op) { static std::tuple conv_trans_attrs_of(Operator op) { - IT_ASSERT(op->getOpType() == OpType::ConvTrans); + IT_ASSERT(op->getOpType() == OpType::ConvTranspose); auto conv = dynamic_cast(op.get()); auto [oph, opw] = conv->getOutputPadding(); return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(), @@ -161,13 +162,13 @@ conv_trans_attrs_of(Operator op) { } static std::tuple matmul_attrs_of(Operator op) { - IT_ASSERT(op->getOpType() == OpType::Matmul); + IT_ASSERT(op->getOpType() == OpType::MatMul); auto matmul = dynamic_cast(op.get()); return std::make_tuple(matmul->getTransA(), matmul->getTransB()); } static std::tuple batch_norm_attrs_of(Operator op) { - IT_ASSERT(op->getOpType() == OpType::BatchNorm); + IT_ASSERT(op->getOpType() == OpType::BatchNormalization); auto batchnorm = dynamic_cast(op.get()); return std::make_tuple(batchnorm->getMomentum(), batchnorm->getEps(), batchnorm->getTrainingMode()); @@ -176,7 +177,7 @@ static std::tuple batch_norm_attrs_of(Operator op) { static std::tuple pool_attrs_of(Operator op) { IT_ASSERT(op->getOpType() == OpType::MaxPool || - op->getOpType() == OpType::AvgPool); + op->getOpType() == OpType::AveragePool); auto pool = dynamic_cast(op.get()); return std::make_tuple(pool->getKh(), pool->getKw(), pool->getDh(), pool->getDw(), pool->getPh(), pool->getPw(), @@ -319,7 +320,7 @@ void init_graph_builder(py::module &m) { .def("conv", &Handler::conv, policy::move) .def("convTransposed2d", &Handler::convTransposed2d, policy::move) .def("matmul", &Handler::matmul, policy::move) - .def("batchNorm", &Handler::batchNorm, policy::move) + .def("batchNormalization", &Handler::batchNormalization, policy::move) .def("maxPool", &Handler::maxPool, policy::move) .def("avgPool", &Handler::avgPool, policy::move) .def("add", &Handler::add, policy::move) diff --git a/src/kernels/bang/activation.cc b/src/kernels/bang/activation.cc index 935e2746..cd19906b 100644 --- a/src/kernels/bang/activation.cc +++ b/src/kernels/bang/activation.cc @@ -92,43 +92,6 @@ class RoundCnnl : public BangKernelWithoutConfig { } }; -class SquareCnnl : public BangKernelWithoutConfig { - void compute(const Operator &_op, - const RuntimeObj *_context) const override { - auto op = as(_op); - auto context = dynamic_cast(_context); - - void *const aData = (op->getInputs(0)->getRawDataPtr()); - void *const cData = (op->getOutput()->getRawDataPtr()); - - cnnlTensorDescriptor_t aDesc, cDesc; - auto dim = op->getInputs(0)->getDims(); - if (dim.size() != 4) - IT_TODO_HALT(); - - int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]}; - // get inputs - checkCnnlError(cnnlCreateTensorDescriptor(&aDesc)); - checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW, - CNNL_DTYPE_FLOAT, 4, dim_array)); - - // get outputs - checkCnnlError(cnnlCreateTensorDescriptor(&cDesc)); - checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW, - CNNL_DTYPE_FLOAT, 4, dim_array)); - - cnnlStatus_t stat = - cnnlSquare(context->cnnlHandle(), aDesc, aData, cDesc, cData); - if (stat != CNNL_STATUS_SUCCESS) - return; - - // Destories in BANG does not require sync. But cnnl does not state - // whether sync is required before destories. - checkCnnlError(cnnlDestroyTensorDescriptor(aDesc)); - checkCnnlError(cnnlDestroyTensorDescriptor(cDesc)); - } -}; - class PReluCnnl : public BangKernelWithoutConfig { void compute(const Operator &_op, const RuntimeObj *_context) const override { @@ -185,24 +148,13 @@ class SigmoidCnnl : public UnaryCnnl { float getCoef() const override { return 0.0; } }; -class TanhCnnl : public UnaryCnnl { - cnnlActivationMode_t getOpType() const override { - return CNNL_ACTIVATION_TANH; - } - float getCoef() const override { return 0.0; } -}; - REGISTER_KERNEL(Device::BANG, OpType::Relu, DataType::Float32, ReluCnnl, "Relu_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::PRelu, DataType::Float32, PReluCnnl, "PRelu_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::Sigmoid, DataType::Float32, SigmoidCnnl, "Sigmoid_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::Tanh, DataType::Float32, TanhCnnl, - "Tanh_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::Round, DataType::Float32, RoundCnnl, "Round_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::Square, DataType::Float32, SquareCnnl, - "Square_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/bang/batchnorm.cc b/src/kernels/bang/batchnorm.cc index c6e4a2dd..d6b9ce53 100644 --- a/src/kernels/bang/batchnorm.cc +++ b/src/kernels/bang/batchnorm.cc @@ -65,7 +65,7 @@ class BatchNormCnnl : public BangKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::BANG, OpType::BatchNorm, DataType::Float32, +REGISTER_KERNEL(Device::BANG, OpType::BatchNormalization, DataType::Float32, BatchNormCnnl, "BatchNorm_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/bang/conv_trans.cc b/src/kernels/bang/conv_trans.cc index 3e22c03d..05ec04fb 100644 --- a/src/kernels/bang/conv_trans.cc +++ b/src/kernels/bang/conv_trans.cc @@ -83,6 +83,6 @@ class ConvTransCnnl : public BangKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::BANG, OpType::ConvTrans, DataType::Float32, +REGISTER_KERNEL(Device::BANG, OpType::ConvTranspose, DataType::Float32, ConvTransCnnl, "ConvTrans_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/bang/copy.cc b/src/kernels/bang/copy.cc deleted file mode 100644 index 37987729..00000000 --- a/src/kernels/bang/copy.cc +++ /dev/null @@ -1,46 +0,0 @@ -#include "bang/bang_kernel_without_config.h" -#include "bang/bang_runtime.h" -#include "operators/unary.h" - -namespace infini { -class CopyCnnl : public BangKernelWithoutConfig { - void compute(const Operator &_op, - const RuntimeObj *_context) const override { - auto op = as(_op); - auto context = dynamic_cast(_context); - - void *const aData = (op->getInputs(0)->getRawDataPtr()); - void *const cData = (op->getOutput()->getRawDataPtr()); - - cnnlTensorDescriptor_t aDesc, cDesc; - auto dim = op->getInputs(0)->getDims(); - if (dim.size() != 4) - IT_TODO_HALT(); - - int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]}; - // get inputs - checkCnnlError(cnnlCreateTensorDescriptor(&aDesc)); - checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW, - CNNL_DTYPE_FLOAT, 4, dim_array)); - - // get outputs - checkCnnlError(cnnlCreateTensorDescriptor(&cDesc)); - checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW, - CNNL_DTYPE_FLOAT, 4, dim_array)); - - cnnlStatus_t stat = - cnnlCopy(context->cnnlHandle(), aDesc, aData, cDesc, cData); - if (stat != CNNL_STATUS_SUCCESS) - return; - - // Destories in BANG does not require sync. But cnnl does not state - // whether sync is required before destories. - checkCnnlError(cnnlDestroyTensorDescriptor(aDesc)); - checkCnnlError(cnnlDestroyTensorDescriptor(cDesc)); - } -}; - -REGISTER_KERNEL(Device::BANG, OpType::Copy, DataType::Float32, CopyCnnl, - "Copy_cnnl_BANG_Float32"); - -}; // namespace infini diff --git a/src/kernels/bang/element_wise.cc b/src/kernels/bang/element_wise.cc index 225e3847..7130b6a7 100644 --- a/src/kernels/bang/element_wise.cc +++ b/src/kernels/bang/element_wise.cc @@ -593,9 +593,6 @@ class MulCnnl : public ElementWiseCnnl { class EqualCnnl : public LogicOpCnnl { cnnlLogicOp_t getOpType() const override { return CNNL_LOGIC_OP_EQ; } }; -class NotEqualCnnl : public LogicOpCnnl { - cnnlLogicOp_t getOpType() const override { return CNNL_LOGIC_OP_NE; } -}; class GreaterThanCnnl : public LogicOpCnnl { cnnlLogicOp_t getOpType() const override { return CNNL_LOGIC_OP_GT; } }; @@ -651,13 +648,13 @@ REGISTER_KERNEL(Device::BANG, OpType::Mul, DataType::Float32, MulCnnl, REGISTER_KERNEL(Device::BANG, OpType::Div, DataType::Float32, DivCnnl, "Div_cnnl_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::Maximum, DataType::Float32, MaximumCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Max, DataType::Float32, MaximumCnnl, "Maximum_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::Minimum, DataType::Float32, MinimumCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Min, DataType::Float32, MinimumCnnl, "Minimum_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::MSELoss, DataType::Float32, MSELossCnnl, "MSELoss_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::Power, DataType::Float32, PowerCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Pow, DataType::Float32, PowerCnnl, "Power_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::FloorDiv, DataType::Float32, FloorDivCnnl, "FloorDiv_cnnl_BANG_Float32"); @@ -667,15 +664,13 @@ REGISTER_KERNEL(Device::BANG, OpType::SquaredDifference, DataType::Float32, SquaredDifferenceCnnl, "SquaredDifference_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::Equal, DataType::Float32, EqualCnnl, "Equal_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::NotEqual, DataType::Float32, NotEqualCnnl, - "NotEqual_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::GreaterThan, DataType::Float32, +REGISTER_KERNEL(Device::BANG, OpType::Greater, DataType::Float32, GreaterThanCnnl, "GreaterThan_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::GreaterEqual, DataType::Float32, +REGISTER_KERNEL(Device::BANG, OpType::GreaterOrEqual, DataType::Float32, GreaterEqualCnnl, "GreaterEqual_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::LessThan, DataType::Float32, LessThanCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Less, DataType::Float32, LessThanCnnl, "LessThan_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::LessEqual, DataType::Float32, +REGISTER_KERNEL(Device::BANG, OpType::LessOrEqual, DataType::Float32, LessEqualCnnl, "LessEqual_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::And, DataType::Float32, AndCnnl, "And_cnnl_BANG_Float32"); @@ -685,13 +680,13 @@ REGISTER_KERNEL(Device::BANG, OpType::Xor, DataType::Float32, XorCnnl, "Xor_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::Not, DataType::Float32, NotCnnl, "Not_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::BitAnd, DataType::Float32, BitAndCnnl, +REGISTER_KERNEL(Device::BANG, OpType::BitwiseAnd, DataType::Float32, BitAndCnnl, "BitAnd_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::BitOr, DataType::Float32, BitOrCnnl, +REGISTER_KERNEL(Device::BANG, OpType::BitwiseOr, DataType::Float32, BitOrCnnl, "BitOr_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::BitXor, DataType::Float32, BitXorCnnl, +REGISTER_KERNEL(Device::BANG, OpType::BitwiseXor, DataType::Float32, BitXorCnnl, "BitXor_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::BitNot, DataType::Float32, BitNotCnnl, +REGISTER_KERNEL(Device::BANG, OpType::BitwiseNot, DataType::Float32, BitNotCnnl, "BitNot_cnnl_BANG_Float32"); // REGISTER_KERNEL(Device::BANG, OpType::BitLeftShift, DataType::Float32, // BitLeftShiftCnnl, diff --git a/src/kernels/bang/matmul.cc b/src/kernels/bang/matmul.cc index 811c43ef..56d9cf0f 100644 --- a/src/kernels/bang/matmul.cc +++ b/src/kernels/bang/matmul.cc @@ -79,6 +79,6 @@ class MatmulCnnl : public BangKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::BANG, OpType::Matmul, DataType::Float32, MatmulCnnl, +REGISTER_KERNEL(Device::BANG, OpType::MatMul, DataType::Float32, MatmulCnnl, "Matmul_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/bang/pooling.cc b/src/kernels/bang/pooling.cc index 5abbbf56..6f907705 100644 --- a/src/kernels/bang/pooling.cc +++ b/src/kernels/bang/pooling.cc @@ -68,6 +68,6 @@ class avgPoolCnnl : public PoolingCnnl { REGISTER_KERNEL(Device::BANG, OpType::MaxPool, DataType::Float32, maxPoolCnnl, "MaxPool_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::AvgPool, DataType::Float32, avgPoolCnnl, - "AvgPool_cnnl_BANG_Float32"); +REGISTER_KERNEL(Device::BANG, OpType::AveragePool, DataType::Float32, + avgPoolCnnl, "AvgPool_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/bang/trigon.cc b/src/kernels/bang/trigon.cc index 4378aa6e..fcf56472 100644 --- a/src/kernels/bang/trigon.cc +++ b/src/kernels/bang/trigon.cc @@ -162,23 +162,23 @@ REGISTER_KERNEL(Device::BANG, OpType::Cos, DataType::Float32, CosCnnl, "Cos_cnnl_BANG_Float32"); REGISTER_KERNEL(Device::BANG, OpType::Tan, DataType::Float32, TanCnnl, "Tan_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ASin, DataType::Float32, ASinCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Asin, DataType::Float32, ASinCnnl, "ASin_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ACos, DataType::Float32, ACosCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Acos, DataType::Float32, ACosCnnl, "ACos_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ATan, DataType::Float32, ATanCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Atan, DataType::Float32, ATanCnnl, "ATan_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::SinH, DataType::Float32, SinHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Sinh, DataType::Float32, SinHCnnl, "SinH_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::CosH, DataType::Float32, CosHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Cosh, DataType::Float32, CosHCnnl, "CosH_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::TanH, DataType::Float32, TanHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Tanh, DataType::Float32, TanHCnnl, "TanH_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ASinH, DataType::Float32, ASinHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Asinh, DataType::Float32, ASinHCnnl, "ASinH_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ACosH, DataType::Float32, ACosHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Acosh, DataType::Float32, ACosHCnnl, "ACosH_cnnl_BANG_Float32"); -REGISTER_KERNEL(Device::BANG, OpType::ATanH, DataType::Float32, ATanHCnnl, +REGISTER_KERNEL(Device::BANG, OpType::Atanh, DataType::Float32, ATanHCnnl, "ATanH_cnnl_BANG_Float32"); }; // namespace infini diff --git a/src/kernels/cpu/matmul.cc b/src/kernels/cpu/matmul.cc index c10023d3..01dcefa6 100644 --- a/src/kernels/cpu/matmul.cc +++ b/src/kernels/cpu/matmul.cc @@ -26,9 +26,9 @@ template class NaiveMatmul : public CpuKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::UInt32, +REGISTER_KERNEL(Device::CPU, OpType::MatMul, DataType::UInt32, NaiveMatmul, "MatmulNaive_CPU_uint32"); -REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32, +REGISTER_KERNEL(Device::CPU, OpType::MatMul, DataType::Float32, NaiveMatmul, "MatmulNaive_CPU_float32"); -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/src/kernels/cpu/pooling.cc b/src/kernels/cpu/pooling.cc index 6656bcc9..acb88d9c 100644 --- a/src/kernels/cpu/pooling.cc +++ b/src/kernels/cpu/pooling.cc @@ -76,6 +76,6 @@ REGISTER_KERNEL(Device::CPU, OpType::MaxPool, DataType::UInt32, NaiveMaxPool, "maxPoolNaive_CPU_uint32"); REGISTER_KERNEL(Device::CPU, OpType::MaxPool, DataType::Float32, NaiveMaxPool, "maxPoolNaive_CPU_float32"); -REGISTER_KERNEL(Device::CPU, OpType::AvgPool, DataType::Float32, +REGISTER_KERNEL(Device::CPU, OpType::AveragePool, DataType::Float32, NaiveAvgPool, "AvgPoolNaive_CPU_float32"); -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/src/kernels/cuda/batch_norm.cc b/src/kernels/cuda/batch_norm.cc index 111137ff..1df7313f 100644 --- a/src/kernels/cuda/batch_norm.cc +++ b/src/kernels/cuda/batch_norm.cc @@ -59,6 +59,6 @@ class BatchNormCudnn : public CudaKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::CUDA, OpType::BatchNorm, DataType::Float32, +REGISTER_KERNEL(Device::CUDA, OpType::BatchNormalization, DataType::Float32, BatchNormCudnn, "BatchNorm_cuDNN_CUDA_Float32"); } // namespace infini diff --git a/src/kernels/cuda/conv_transposed.cc b/src/kernels/cuda/conv_transposed.cc index fe11f0d3..4bd1b5e9 100644 --- a/src/kernels/cuda/conv_transposed.cc +++ b/src/kernels/cuda/conv_transposed.cc @@ -300,7 +300,7 @@ class convBackwardDataCudnn : public Kernel { } }; -REGISTER_KERNEL(Device::CUDA, OpType::ConvTrans, DataType::Float32, +REGISTER_KERNEL(Device::CUDA, OpType::ConvTranspose, DataType::Float32, convBackwardDataCudnn, "ConvTranposed_cuDNN_CUDA_Float32"); REGISTER_KERNEL(Device::CUDA, OpType::ConvTransNHWC, DataType::Float32, convBackwardDataCudnn, "ConvTranposedNHWC_cuDNN_CUDA_Float32"); diff --git a/src/kernels/cuda/matmul.cc b/src/kernels/cuda/matmul.cc index 0b15e4b6..e238ef49 100644 --- a/src/kernels/cuda/matmul.cc +++ b/src/kernels/cuda/matmul.cc @@ -114,7 +114,7 @@ class matmulCublas : public Kernel { } }; -REGISTER_KERNEL(Device::CUDA, OpType::Matmul, DataType::Float32, matmulCublas, +REGISTER_KERNEL(Device::CUDA, OpType::MatMul, DataType::Float32, matmulCublas, "Matmul_cuBLAS_CUDA_Float32"); REGISTER_CONSTRUCTOR(2, MatmulCublasPerfRecordObj::from_json); diff --git a/src/kernels/cuda/pooling.cc b/src/kernels/cuda/pooling.cc index 552690b9..90aa3944 100644 --- a/src/kernels/cuda/pooling.cc +++ b/src/kernels/cuda/pooling.cc @@ -68,6 +68,6 @@ class avgPoolCudnn : public poolingCudnn { REGISTER_KERNEL(Device::CUDA, OpType::MaxPool, DataType::Float32, maxPoolCudnn, "MaxPool_cuDNN_CUDA_Float32"); -REGISTER_KERNEL(Device::CUDA, OpType::AvgPool, DataType::Float32, avgPoolCudnn, - "AvgPool_cuDNN_CUDA_Float32"); +REGISTER_KERNEL(Device::CUDA, OpType::AveragePool, DataType::Float32, + avgPoolCudnn, "AvgPool_cuDNN_CUDA_Float32"); }; // namespace infini diff --git a/src/kernels/intelcpu/batch_norm.cc b/src/kernels/intelcpu/batch_norm.cc index 88296605..9410bcb8 100644 --- a/src/kernels/intelcpu/batch_norm.cc +++ b/src/kernels/intelcpu/batch_norm.cc @@ -63,6 +63,6 @@ class MklBatchNorm : public MklKernelWithoutConfig { {DNNL_ARG_SHIFT, baisMemory}}); } }; -REGISTER_KERNEL(Device::INTELCPU, OpType::BatchNorm, DataType::Float32, +REGISTER_KERNEL(Device::INTELCPU, OpType::BatchNormalization, DataType::Float32, MklBatchNorm, "BatchNorm_Mkl_Float32"); }; // namespace infini diff --git a/src/kernels/intelcpu/conv_transposed.cc b/src/kernels/intelcpu/conv_transposed.cc index aca5cca5..ebf1ad24 100644 --- a/src/kernels/intelcpu/conv_transposed.cc +++ b/src/kernels/intelcpu/conv_transposed.cc @@ -244,7 +244,7 @@ class MklConvTranspose : public Kernel { return make_ref(ret); } }; -REGISTER_KERNEL(Device::INTELCPU, OpType::ConvTrans, DataType::Float32, +REGISTER_KERNEL(Device::INTELCPU, OpType::ConvTranspose, DataType::Float32, MklConvTranspose, "MklConvTrans_CPU_float32"); } // namespace infini diff --git a/src/operators/G2BMM.cc b/src/operators/G2BMM.cc index 80d63482..aafc829e 100644 --- a/src/operators/G2BMM.cc +++ b/src/operators/G2BMM.cc @@ -38,12 +38,12 @@ optional> G2BMMObj::inferShape(const TensorVec &inputs) const { } vector G2BMMObj::getWorkloadVector() const { - return {enum_to_underlying(type), b, m, k, width, dilation, + return {type.underlying(), b, m, k, width, dilation, enum_to_underlying(act)}; } vector G2BMMObj::getOpAttrVector() const { - return {enum_to_underlying(type), width, dilation, enum_to_underlying(act)}; + return {type.underlying(), width, dilation, enum_to_underlying(act)}; } } // namespace infini diff --git a/src/operators/GBMM.cc b/src/operators/GBMM.cc index 227c48f7..ab034472 100644 --- a/src/operators/GBMM.cc +++ b/src/operators/GBMM.cc @@ -37,11 +37,10 @@ optional> GBMMObj::inferShape(const TensorVec &inputs) const { } vector GBMMObj::getWorkloadVector() const { - return {enum_to_underlying(type), b, m, w, n, dilation, - enum_to_underlying(act)}; + return {type.underlying(), b, m, w, n, dilation, enum_to_underlying(act)}; } vector GBMMObj::getOpAttrVector() const { - return {enum_to_underlying(type), dilation, enum_to_underlying(act)}; + return {type.underlying(), dilation, enum_to_underlying(act)}; } } // namespace infini diff --git a/src/operators/activation_backward.cc b/src/operators/activation_backward.cc index 47a42086..b968c936 100644 --- a/src/operators/activation_backward.cc +++ b/src/operators/activation_backward.cc @@ -15,7 +15,7 @@ ActivationBackwardObj::inferShape(const TensorVec &inputs) const { std::string ActivationBackwardObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -24,14 +24,14 @@ std::string ActivationBackwardObj::toString() const { } vector ActivationBackwardObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } vector ActivationBackwardObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; + return {type.underlying()}; } }; // namespace infini diff --git a/src/operators/batch_norm.cc b/src/operators/batch_norm.cc index f85b72f1..69271377 100644 --- a/src/operators/batch_norm.cc +++ b/src/operators/batch_norm.cc @@ -4,7 +4,8 @@ namespace infini { BatchNormObj::BatchNormObj(GraphObj *graph, Tensor input, Tensor output, Tensor mean, Tensor var, Tensor scale, Tensor bias, float momentum, float eps, bool trainingMode) - : OperatorObj(OpType::BatchNorm, {input, mean, var, scale, bias}, {output}), + : OperatorObj(OpType::BatchNormalization, {input, mean, var, scale, bias}, + {output}), momentum(momentum), eps(eps), trainingMode(trainingMode) { if (trainingMode) IT_TODO_HALT(); @@ -38,7 +39,7 @@ vector BatchNormObj::inferDataType(const TensorVec &inputs) const { std::string BatchNormObj::toString() const { std::ostringstream os; - os << "BatchNorm[" << getGuid() << "]"; + os << "batchNormalization[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "momentum=" << momentum << ","; @@ -57,13 +58,13 @@ std::string BatchNormObj::toString() const { // need eps and momentum? vector BatchNormObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } // need eps and momentum? vector BatchNormObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; + return {type.underlying()}; } } // namespace infini diff --git a/src/operators/concat.cc b/src/operators/concat.cc index e98497a1..8f8a9f7b 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -47,12 +47,12 @@ vector ConcatObj::getWorkloadVector() const { vector ret = getOutput()->getDims(); ret.emplace(ret.begin(), (int)inputs.size()); ret.emplace(ret.begin(), dim); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector ConcatObj::getOpAttrVector() const { - return {enum_to_underlying(type), dim}; + return {type.underlying(), dim}; } } // namespace infini diff --git a/src/operators/conv.cc b/src/operators/conv.cc index 36d97081..f7ab1944 100644 --- a/src/operators/conv.cc +++ b/src/operators/conv.cc @@ -19,7 +19,7 @@ ConvBaseObj::ConvBaseObj(OpType opType, TensorVec inputs, Tensor &output, string ConvBaseObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(getOpType()) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; if (inputs.size() == 2) { os << vecToString(inputs[0]->getDims()) << ","; @@ -36,13 +36,12 @@ string ConvBaseObj::toString() const { } vector ConvBaseObj::getWorkloadVector() const { - return { - enum_to_underlying(type), n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw}; } vector ConvBaseObj::getOpAttrVector() const { // IT_TODO_HALT(); // should padding mode / ph+pw be in attrs? - return {enum_to_underlying(type), c, f, r, s, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), c, f, r, s, ph, pw, sh, sw, dh, dw}; } void ConvObj::setAuxilaryAttributes(PaddingMode mode) { @@ -119,8 +118,8 @@ ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input, int pw, int sh, int sw, int dh, int dw, int oph, int opw, int group, Tensor bias, ActType act) - : ConvBaseObj(OpType::ConvTrans, {input, weight}, output, ph, pw, sh, sw, - dh, dw, output, weight, act), + : ConvBaseObj(OpType::ConvTranspose, {input, weight}, output, ph, pw, sh, + sw, dh, dw, output, weight, act), oph(oph), opw(opw), group(group) { if (bias) IT_TODO_HALT(); @@ -133,8 +132,8 @@ ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input, PaddingMode mode, int sh, int sw, int dh, int dw, int oph, int opw, int group, Tensor bias, ActType act) - : ConvBaseObj(OpType::ConvTrans, {input, weight}, output, mode, sh, sw, dh, - dw, output, weight, act), + : ConvBaseObj(OpType::ConvTranspose, {input, weight}, output, mode, sh, sw, + dh, dw, output, weight, act), oph(oph), opw(opw), group(group) { if (bias) IT_TODO_HALT(); @@ -274,8 +273,8 @@ ConvTransposed2dNHWCObj::ConvTransposed2dNHWCObj(GraphObj *graph, Tensor input, int sw, int dh, int dw, int oph, int opw, int group, Tensor bias, ActType act) - : ConvBaseObj(OpType::ConvTrans, {input, weight}, output, mode, sh, sw, dh, - dw, output, weight, act), + : ConvBaseObj(OpType::ConvTranspose, {input, weight}, output, mode, sh, sw, + dh, dw, output, weight, act), oph(oph), opw(opw), group(group) { if (bias) IT_TODO_HALT(); diff --git a/src/operators/det.cc b/src/operators/det.cc index e83f67ed..0c84b5b7 100644 --- a/src/operators/det.cc +++ b/src/operators/det.cc @@ -21,7 +21,7 @@ optional> DetObj::inferShape(const TensorVec &inputs) const { std::string DetObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -30,14 +30,12 @@ std::string DetObj::toString() const { } vector DetObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector DetObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector DetObj::getOpAttrVector() const { return {type.underlying()}; } }; // namespace infini diff --git a/src/operators/dropout.cc b/src/operators/dropout.cc index 9a59942c..08eca92a 100644 --- a/src/operators/dropout.cc +++ b/src/operators/dropout.cc @@ -29,12 +29,12 @@ std::string DropoutObj::toString() const { vector DropoutObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); ret.emplace_back(static_cast(ratio)); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector DropoutObj::getOpAttrVector() const { - return {enum_to_underlying(type), static_cast(ratio), false}; + return {type.underlying(), static_cast(ratio), false}; } } // namespace infini diff --git a/src/operators/element_wise.cc b/src/operators/element_wise.cc index b2f9e0cc..008c6872 100644 --- a/src/operators/element_wise.cc +++ b/src/operators/element_wise.cc @@ -39,7 +39,7 @@ ElementWiseObj::inferShape(const TensorVec &inputs) const { std::string ElementWiseObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << vecToString(inputs[1]->getDims()) << ","; @@ -52,12 +52,12 @@ std::string ElementWiseObj::toString() const { // use output dim or inputs dim? vector ElementWiseObj::getWorkloadVector() const { vector ret = outputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector ElementWiseObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; + return {type.underlying()}; } MSELossObj::MSELossObj(GraphObj *graph, Tensor input0, Tensor input1, @@ -83,7 +83,7 @@ optional> MSELossObj::inferShape(const TensorVec &inputs) const { std::string MSELossObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << vecToString(inputs[1]->getDims()) << ","; @@ -96,12 +96,10 @@ std::string MSELossObj::toString() const { // use output dim or inputs dim? vector MSELossObj::getWorkloadVector() const { vector ret = outputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } -vector MSELossObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector MSELossObj::getOpAttrVector() const { return {type.underlying()}; } }; // namespace infini diff --git a/src/operators/extend.cc b/src/operators/extend.cc index 332a76ff..13efcfcf 100644 --- a/src/operators/extend.cc +++ b/src/operators/extend.cc @@ -30,12 +30,12 @@ vector ExtendObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); ret.emplace_back(dim); ret.emplace_back(num); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector ExtendObj::getOpAttrVector() const { - return {enum_to_underlying(type), dim, num}; + return {type.underlying(), dim, num}; } } // namespace infini diff --git a/src/operators/gather.cc b/src/operators/gather.cc index afb4996e..7b54701d 100644 --- a/src/operators/gather.cc +++ b/src/operators/gather.cc @@ -72,7 +72,7 @@ std::string GatherObj::toString() const { vector GatherObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); for (auto it : inputs[1]->getDims()) ret.emplace_back(it); ret.emplace_back(axis); @@ -80,7 +80,7 @@ vector GatherObj::getWorkloadVector() const { } vector GatherObj::getOpAttrVector() const { - return {enum_to_underlying(type), axis}; + return {type.underlying(), axis}; } } // namespace infini diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 3fb75371..b26b15ee 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -4,7 +4,7 @@ namespace infini { MatmulObj::MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, bool transA, bool transB, [[maybe_unused]] Tensor bias, ActType act) - : OperatorObj(OpType::Matmul, + : OperatorObj(OpType::MatMul, bias ? TensorVec{A, B, bias} : TensorVec{A, B}, {C}), transA(transA), transB(transB), act(act), b(1) { auto shape_a = A->getDims(); @@ -82,12 +82,12 @@ optional> MatmulObj::inferShape(const TensorVec &inputs) const { } vector MatmulObj::getWorkloadVector() const { - return {enum_to_underlying(type), b, m, n, k, transA, transB, + return {type.underlying(), b, m, n, k, transA, transB, enum_to_underlying(act)}; } vector MatmulObj::getOpAttrVector() const { - return {enum_to_underlying(type), transA, transB, enum_to_underlying(act)}; + return {type.underlying(), transA, transB, enum_to_underlying(act)}; } } // namespace infini diff --git a/src/operators/membound.cc b/src/operators/membound.cc index ba69a5f5..8fd6d294 100644 --- a/src/operators/membound.cc +++ b/src/operators/membound.cc @@ -69,7 +69,7 @@ optional> MemBoundObj::inferShape(const TensorVec &inputs) const { } vector MemBoundObj::getWorkloadVector() const { - return {enum_to_underlying(type), (int)simplifiedHash}; + return {type.underlying(), (int)simplifiedHash}; } vector MemBoundObj::getOpAttrVector() const { return getWorkloadVector(); } diff --git a/src/operators/pad.cc b/src/operators/pad.cc index 2b853769..3e0ce94c 100644 --- a/src/operators/pad.cc +++ b/src/operators/pad.cc @@ -50,13 +50,13 @@ std::string PadObj::toString() const { vector PadObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); ret.insert(ret.end(), pads.begin(), pads.end()); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector PadObj::getOpAttrVector() const { vector ret = pads; - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } diff --git a/src/operators/pooling.cc b/src/operators/pooling.cc index c1a2b0d9..0061bf6f 100644 --- a/src/operators/pooling.cc +++ b/src/operators/pooling.cc @@ -28,7 +28,7 @@ optional> PoolingObj::inferShape(const TensorVec &inputs) const { std::string PoolingObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << "k=[" << kh << "," << kw << "],"; os << "p=[" << ph << "," << pw << "],"; @@ -40,12 +40,11 @@ std::string PoolingObj::toString() const { } vector PoolingObj::getWorkloadVector() const { - return { - enum_to_underlying(type), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw}; } vector PoolingObj::getOpAttrVector() const { - return {enum_to_underlying(type), kh, kw, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), kh, kw, ph, pw, sh, sw, dh, dw}; } }; // namespace infini diff --git a/src/operators/reduce_mean.cc b/src/operators/reduce_mean.cc index d55377df..633e6b86 100644 --- a/src/operators/reduce_mean.cc +++ b/src/operators/reduce_mean.cc @@ -69,14 +69,14 @@ std::string ReduceMeanObj::toString() const { vector ReduceMeanObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); ret.emplace_back((int)keepDims); ret.insert(ret.end(), axes.begin(), axes.end()); return ret; } vector ReduceMeanObj::getOpAttrVector() const { - vector ret = {enum_to_underlying(type), (int)keepDims}; + vector ret = {type.underlying(), (int)keepDims}; ret.insert(ret.end(), axes.begin(), axes.end()); return ret; } diff --git a/src/operators/reshape.cc b/src/operators/reshape.cc index fa45d48e..7110ab90 100644 --- a/src/operators/reshape.cc +++ b/src/operators/reshape.cc @@ -30,12 +30,12 @@ std::string ReshapeObj::toString() const { vector ReshapeObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); ret.insert(ret.end(), dims.begin(), dims.end()); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector ReshapeObj::getOpAttrVector() const { vector ret = dims; - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } @@ -74,12 +74,12 @@ std::string FlattenObj::toString() const { vector FlattenObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); ret.emplace(ret.begin(), axis); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } vector FlattenObj::getOpAttrVector() const { - return {enum_to_underlying(type), axis}; + return {type.underlying(), axis}; } IdentityObj::IdentityObj(GraphObj *graph, Tensor input, Tensor output) @@ -103,10 +103,8 @@ std::string IdentityObj::toString() const { vector IdentityObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } -vector IdentityObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector IdentityObj::getOpAttrVector() const { return {type.underlying()}; } } // namespace infini diff --git a/src/operators/resize.cc b/src/operators/resize.cc index 2b04664f..63998de0 100644 --- a/src/operators/resize.cc +++ b/src/operators/resize.cc @@ -244,7 +244,7 @@ vector ResizeObj::getWorkloadVector() const { // here. ret.emplace_back(enum_to_underlying(coMode)); ret.emplace_back(enum_to_underlying(nearestMode)); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } @@ -253,7 +253,7 @@ vector ResizeObj::getOpAttrVector() const { ret.emplace_back(enum_to_underlying(coMode)); ret.emplace_back(enum_to_underlying(nearestMode)); ret.emplace_back(enum_to_underlying(ratioPolicy)); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); return ret; } diff --git a/src/operators/slice.cc b/src/operators/slice.cc index a61d9d85..1ded2745 100644 --- a/src/operators/slice.cc +++ b/src/operators/slice.cc @@ -93,7 +93,7 @@ vector SliceObj::getWorkloadVector() const { } vector SliceObj::getOpAttrVector() const { - vector ans{enum_to_underlying(type)}; + vector ans{type.underlying()}; for (const auto &range : axes) { ans.push_back(range.start); ans.push_back(range.end); diff --git a/src/operators/softmax.cc b/src/operators/softmax.cc index c5bd7d25..2fa2ccf6 100644 --- a/src/operators/softmax.cc +++ b/src/operators/softmax.cc @@ -15,7 +15,7 @@ SoftmaxObj::SoftmaxObj(GraphObj *graph, Tensor input, Tensor output, int _axis) std::string SoftmaxObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -25,13 +25,13 @@ std::string SoftmaxObj::toString() const { } vector SoftmaxObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type), axis}; + vector ret{type.underlying(), axis}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } vector SoftmaxObj::getOpAttrVector() const { - return {enum_to_underlying(type), axis}; + return {type.underlying(), axis}; } } // namespace infini diff --git a/src/operators/split.cc b/src/operators/split.cc index eb602417..52c6a61d 100644 --- a/src/operators/split.cc +++ b/src/operators/split.cc @@ -56,14 +56,14 @@ optional> SplitObj::inferShape(const TensorVec &inputs) const { vector SplitObj::getWorkloadVector() const { vector ret = inputs[0]->getDims(); - ret.emplace(ret.begin(), enum_to_underlying(type)); + ret.emplace(ret.begin(), type.underlying()); ret.emplace_back(dim); ret.emplace_back(num); return ret; } vector SplitObj::getOpAttrVector() const { - return {enum_to_underlying(type), dim, num}; + return {type.underlying(), dim, num}; } string SplitObj::toString() const { diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index 616eeb14..490dc9e0 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -28,7 +28,7 @@ TransposeObj::inferShape(const TensorVec &inputs) const { std::string TransposeObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -37,14 +37,14 @@ std::string TransposeObj::toString() const { } vector TransposeObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } vector TransposeObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; + return {type.underlying()}; } }; // namespace infini diff --git a/src/operators/unary.cc b/src/operators/unary.cc index 6f85cecf..7436ac9f 100644 --- a/src/operators/unary.cc +++ b/src/operators/unary.cc @@ -13,7 +13,7 @@ optional> UnaryObj::inferShape(const TensorVec &inputs) const { std::string UnaryObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -22,15 +22,13 @@ std::string UnaryObj::toString() const { } vector UnaryObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector UnaryObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector UnaryObj::getOpAttrVector() const { return {type.underlying()}; } ClipObj::ClipObj(GraphObj *graph, Tensor input, Tensor output, std::optional min, std::optional max) @@ -46,7 +44,7 @@ optional> ClipObj::inferShape(const TensorVec &inputs) const { std::string ClipObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -55,15 +53,13 @@ std::string ClipObj::toString() const { } vector ClipObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector ClipObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector ClipObj::getOpAttrVector() const { return {type.underlying()}; } HardtanhObj::HardtanhObj(GraphObj *graph, Tensor input, Tensor output, float min, float max) @@ -79,7 +75,7 @@ optional> HardtanhObj::inferShape(const TensorVec &inputs) const { std::string HardtanhObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -88,15 +84,13 @@ std::string HardtanhObj::toString() const { } vector HardtanhObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector HardtanhObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector HardtanhObj::getOpAttrVector() const { return {type.underlying()}; } FillObj::FillObj(GraphObj *graph, Tensor input, Tensor output, float value) : OperatorObj(OpType::Fill, {input}, {output}), setValue(value) { @@ -110,22 +104,20 @@ optional> FillObj::inferShape(const TensorVec &inputs) const { std::string FillObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } vector FillObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector FillObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector FillObj::getOpAttrVector() const { return {type.underlying()}; } L2LossObj::L2LossObj(GraphObj *graph, Tensor input, Tensor output) : OperatorObj(OpType::L2Loss, {input}, {output}) { @@ -139,22 +131,20 @@ optional> L2LossObj::inferShape(const TensorVec &inputs) const { std::string L2LossObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } vector L2LossObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector L2LossObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector L2LossObj::getOpAttrVector() const { return {type.underlying()}; } CastObj::CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type) : OperatorObj(OpType::Cast, {input}, {output}), castType(type) { @@ -176,22 +166,20 @@ optional> CastObj::inferShape(const TensorVec &inputs) const { std::string CastObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } vector CastObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector CastObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector CastObj::getOpAttrVector() const { return {type.underlying()}; } DataType CastObj::getOutputDataType() const { switch (castType) { @@ -251,7 +239,7 @@ optional> ShapeObj::inferShape(const TensorVec &inputs) const { std::string ShapeObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "](" + os << type.toString() << "[" << getGuid() << "](" << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } @@ -268,7 +256,7 @@ optional> PReluObj::inferShape(const TensorVec &inputs) const { std::string PReluObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << vecToString(inputs[0]->getDims()) << ","; os << "input=" << inputs[0]->getGuid() << ","; @@ -277,15 +265,13 @@ std::string PReluObj::toString() const { } vector PReluObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector PReluObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector PReluObj::getOpAttrVector() const { return {type.underlying()}; } LogObj::LogObj(GraphObj *graph, Tensor input, Tensor output, LogType type) : OperatorObj(OpType::Log, {input}, {output}), logType(type) { @@ -299,21 +285,19 @@ optional> LogObj::inferShape(const TensorVec &inputs) const { std::string LogObj::toString() const { std::ostringstream os; - os << OpRegistry::getOpName(type) << "[" << getGuid() << "]"; + os << type.toString() << "[" << getGuid() << "]"; os << "("; os << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } vector LogObj::getWorkloadVector() const { - vector ret{enum_to_underlying(type)}; + vector ret{type.underlying()}; const Shape shape = outputs[0]->getDims(); ret.insert(ret.end(), shape.begin(), shape.end()); return ret; } -vector LogObj::getOpAttrVector() const { - return {enum_to_underlying(type)}; -} +vector LogObj::getOpAttrVector() const { return {type.underlying()}; } }; // namespace infini diff --git a/test/kernels/bang/test_bang_copy.cc b/test/kernels/bang/test_bang_copy.cc deleted file mode 100644 index 7b5ca90f..00000000 --- a/test/kernels/bang/test_bang_copy.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini { - -template -void testCopy(const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu = make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu->dataMalloc(); - inputCpu->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu = bangGraph->cloneTensor(inputCpu); - auto gpuOp = bangGraph->addOp(inputGpu, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - inputCpu->printData(); - outputGpu2Cpu->printData(); - EXPECT_TRUE(outputGpu2Cpu->equalData(inputCpu)); -} - -TEST(cnnl_Copy, run) { - testCopy(IncrementalGenerator(), Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/bang/test_bang_floordiv.cc b/test/kernels/bang/test_bang_floordiv.cc deleted file mode 100644 index cf4539a9..00000000 --- a/test/kernels/bang/test_bang_floordiv.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/element_wise.h" - -#include "test.h" - -namespace infini { - -template -void testFloorDiv( - const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu1 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu1->dataMalloc(); - inputCpu1->setData(generator); - Tensor inputCpu2 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu2->dataMalloc(); - inputCpu2->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu1 = bangGraph->cloneTensor(inputCpu1); - auto inputGpu2 = bangGraph->cloneTensor(inputCpu2); - auto gpuOp = bangGraph->addOp(inputGpu1, inputGpu2, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - // Check - inputCpu1->printData(); - inputCpu2->printData(); - outputGpu2Cpu->printData(); - EXPECT_TRUE(1); -} - -TEST(cnnl_FloorDiv, run) { - testFloorDiv(IncrementalGenerator(), Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/bang/test_bang_floormod.cc b/test/kernels/bang/test_bang_floormod.cc deleted file mode 100644 index 2d1eaa1b..00000000 --- a/test/kernels/bang/test_bang_floormod.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/element_wise.h" - -#include "test.h" - -namespace infini { - -template -void testFloorMod( - const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu1 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu1->dataMalloc(); - inputCpu1->setData(generator); - Tensor inputCpu2 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu2->dataMalloc(); - inputCpu2->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu1 = bangGraph->cloneTensor(inputCpu1); - auto inputGpu2 = bangGraph->cloneTensor(inputCpu2); - auto gpuOp = bangGraph->addOp(inputGpu1, inputGpu2, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - // Check - inputCpu1->printData(); - inputCpu2->printData(); - outputGpu2Cpu->printData(); - EXPECT_TRUE(1); -} - -TEST(cnnl_FloorMod, run) { - testFloorMod(IncrementalGenerator(), Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/bang/test_bang_logic.cc b/test/kernels/bang/test_bang_logic.cc index b9bf73d5..f1378f0e 100644 --- a/test/kernels/bang/test_bang_logic.cc +++ b/test/kernels/bang/test_bang_logic.cc @@ -42,7 +42,6 @@ void testLogicOp(const std::function &generator, TEST(cnnl_LogicOp, run) { testLogicOp(IncrementalGenerator(), Shape{1, 2, 2, 3}); - testLogicOp(IncrementalGenerator(), Shape{1, 2, 2, 3}); testLogicOp(IncrementalGenerator(), Shape{1, 2, 2, 3}); testLogicOp(IncrementalGenerator(), Shape{1, 2, 2, 3}); testLogicOp(IncrementalGenerator(), Shape{1, 2, 2, 3}); diff --git a/test/kernels/bang/test_bang_rsqrt.cc b/test/kernels/bang/test_bang_rsqrt.cc deleted file mode 100644 index a420a638..00000000 --- a/test/kernels/bang/test_bang_rsqrt.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini { - -template -void testRsqrt(const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu = make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu->dataMalloc(); - inputCpu->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu = bangGraph->cloneTensor(inputCpu); - auto gpuOp = bangGraph->addOp(inputGpu, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - inputCpu->printData(); - outputGpu2Cpu->printData(); - EXPECT_TRUE(1); -} - -TEST(cnnl_Rsqrt, run) { - testRsqrt(IncrementalGenerator(), Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/bang/test_bang_square.cc b/test/kernels/bang/test_bang_square.cc deleted file mode 100644 index f759f790..00000000 --- a/test/kernels/bang/test_bang_square.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini { - -template -void testSquare(const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu = make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu->dataMalloc(); - inputCpu->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu = bangGraph->cloneTensor(inputCpu); - auto gpuOp = bangGraph->addOp(inputGpu, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - inputCpu->printData(); - outputGpu2Cpu->printData(); - EXPECT_TRUE(1); -} - -TEST(cnnl_Square, run) { - testSquare(IncrementalGenerator(), Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/bang/test_bang_squaredDifference.cc b/test/kernels/bang/test_bang_squaredDifference.cc deleted file mode 100644 index 33efa755..00000000 --- a/test/kernels/bang/test_bang_squaredDifference.cc +++ /dev/null @@ -1,48 +0,0 @@ -#include "bang/bang_runtime.h" -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/element_wise.h" - -#include "test.h" - -namespace infini { - -template -void testSquaredDifference( - const std::function &generator, - const Shape &shape) { - // Runtime - Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); - auto bangRuntime = make_ref(); - - // Build input data on CPU - Tensor inputCpu1 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu1->dataMalloc(); - inputCpu1->setData(generator); - Tensor inputCpu2 = - make_ref(shape, DataType::Float32, cpuRuntime); - inputCpu2->dataMalloc(); - inputCpu2->setData(generator); - - // GPU - Graph bangGraph = make_ref(bangRuntime); - auto inputGpu1 = bangGraph->cloneTensor(inputCpu1); - auto inputGpu2 = bangGraph->cloneTensor(inputCpu2); - auto gpuOp = bangGraph->addOp(inputGpu1, inputGpu2, nullptr); - bangGraph->dataMalloc(); - bangRuntime->run(bangGraph); - auto outputGpu = gpuOp->getOutput(); - auto outputGpu2Cpu = outputGpu->clone(cpuRuntime); - // Check - outputGpu2Cpu->printData(); - EXPECT_TRUE(1); -} - -TEST(cnnl_SquaredDifference, run) { - testSquaredDifference(IncrementalGenerator(), - Shape{1, 2, 2, 3}); -} - -} // namespace infini diff --git a/test/kernels/cuda/test_cuda_conv_transposed_2d.cc b/test/kernels/cuda/test_cuda_conv_transposed_2d.cc index 9aef0f39..8def6a32 100644 --- a/test/kernels/cuda/test_cuda_conv_transposed_2d.cc +++ b/test/kernels/cuda/test_cuda_conv_transposed_2d.cc @@ -152,8 +152,8 @@ TEST(cuDNN_ConvTransposed, tune) { bool tune = true; cuda->run(gCuda, tune); // check record - auto kernelAttrs = - KernelAttrs{Device::CUDA, conv->getOpType(), DataType::Float32}; + auto kernelAttrs = KernelAttrs{Device::CUDA, conv->getOpType().underlying(), + DataType::Float32}; auto perfKey = PerfEngine::Key{kernelAttrs, conv->getOpPerfKey()}; std::optional perfData = PerfEngine::getInstance().getPerfData(perfKey); diff --git a/test/kernels/intelcpu/test_mkl_conv.cc b/test/kernels/intelcpu/test_mkl_conv.cc index 96fd5498..76ff2628 100644 --- a/test/kernels/intelcpu/test_mkl_conv.cc +++ b/test/kernels/intelcpu/test_mkl_conv.cc @@ -53,8 +53,8 @@ TEST(mkl_Conv, tune) { mklRuntime->run(gMkl, tune); // check record - auto kernelAttrs = - KernelAttrs{Device::INTELCPU, conv->getOpType(), DataType::Float32}; + auto kernelAttrs = KernelAttrs{ + Device::INTELCPU, conv->getOpType().underlying(), DataType::Float32}; auto perfKey = PerfEngine::Key{kernelAttrs, conv->getOpPerfKey()}; std::optional perfData = PerfEngine::getInstance().getPerfData(perfKey); diff --git a/test/kernels/intelcpu/test_mkl_conv_transposed.cc b/test/kernels/intelcpu/test_mkl_conv_transposed.cc index 44b04174..40a33fcd 100644 --- a/test/kernels/intelcpu/test_mkl_conv_transposed.cc +++ b/test/kernels/intelcpu/test_mkl_conv_transposed.cc @@ -73,8 +73,8 @@ TEST(mkl_ConvTransposed, tune) { bool tune = true; runtime->run(gMkl, tune); // check record - auto kernelAttrs = - KernelAttrs{Device::INTELCPU, conv->getOpType(), DataType::Float32}; + auto kernelAttrs = KernelAttrs{ + Device::INTELCPU, conv->getOpType().underlying(), DataType::Float32}; auto perfKey = PerfEngine::Key{kernelAttrs, conv->getOpPerfKey()}; std::optional perfData = PerfEngine::getInstance().getPerfData(perfKey); diff --git a/test/operators/test_batch_norm.cc b/test/operators/test_batch_norm.cc index e2ef15ce..ee47ce06 100644 --- a/test/operators/test_batch_norm.cc +++ b/test/operators/test_batch_norm.cc @@ -4,7 +4,7 @@ #include "test.h" namespace infini { -TEST(BatchNorm, ShapeInference) { +TEST(BatchNormalization, ShapeInference) { Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); { Graph g = make_ref(cpuRuntime);