From 8b685ae4a6ac4317283e2883241735392e29f45b Mon Sep 17 00:00:00 2001 From: Liyan Zheng Date: Tue, 9 Aug 2022 14:58:45 +0800 Subject: [PATCH] Update: OpAttrs -> OpPerfKey --- CMakeLists.txt | 1 - include/core/common.h | 3 +-- include/core/graph.h | 11 +++++++-- include/core/kernel.h | 30 ++++++++++++++--------- include/core/mutator.h | 9 +++++-- include/core/operator.h | 48 ++++++++++++++++++++++++++++++------ include/core/perf_engine.h | 4 ++- include/core/tensor_base.h | 13 ++-------- include/operators/matmul.h | 50 +++++++++++++++----------------------- src/kerels/cpu/matmul.cc | 11 ++++----- src/operators/matmul.cc | 37 ++++++++++++++++------------ 11 files changed, 127 insertions(+), 90 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 57dec16a..f1079f65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent) include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include) if(BUILD_TEST) - # TODO: chekc set set(BUILD_GMOCK OFF CACHE BOOL "Do not build gmock" FORCE) diff --git a/include/core/common.h b/include/core/common.h index 0c69ef35..0fe7344e 100644 --- a/include/core/common.h +++ b/include/core/common.h @@ -16,8 +16,6 @@ namespace infini { using std::list; using std::map; -using std::nullopt; -using std::optional; using std::pair; using std::set; using std::string; @@ -29,6 +27,7 @@ using std::vector; // Aliases using dtype = float; +using HashType = size_t; // compatible with std::hash // Metaprogramming utilities #define _CAT(A, B) A##B diff --git a/include/core/graph.h b/include/core/graph.h index 2f72bb94..9c87310a 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -4,6 +4,7 @@ namespace infini { +// TODO: graph should be attached to a context class GraphNode : public Object { protected: TensorVec tensors; @@ -29,12 +30,18 @@ class GraphNode : public Object { return tensor; } - void updateConnection(); void dataMalloc(); + private: + // TODO: updateConnection + /** + * @brief Add reverse connections and Op relationship in ctor. + */ + void updateConnection(); + // TODO: move to another class // bool exportOnnx(const char *path); // bool importOnnx(const char *net); }; -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/include/core/kernel.h b/include/core/kernel.h index 437884e2..6c0c5677 100644 --- a/include/core/kernel.h +++ b/include/core/kernel.h @@ -23,38 +23,46 @@ class Kernel { }; class KernelRegistry { + public: + using KernelRecord = + tuple; // Kernel, name, ID + + private: + std::map kernels; + int nKernels = 0; + public: ~KernelRegistry() { for (auto &[k, v] : kernels) - delete v; + delete std::get<0>(v); } static KernelRegistry &getInstance() { static KernelRegistry instance; return instance; } - bool registerKernel(const KernelAttrs &key, Kernel *kernel) { - // TODO: kernels with priority + bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) { + // TODO: mutliple kernels support: priority and check name IT_ASSERT(kernels.find(key) == kernels.end(), "Kernel already registered"); - kernels.emplace(key, kernel); + kernels.emplace(key, KernelRecord{kernel, name, ++nKernels}); return true; } Kernel *getKernel(const KernelAttrs &kernelAttrs) const { + return std::get<0>(kernels.at(kernelAttrs)); + } + const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const { return kernels.at(kernelAttrs); } - - private: - std::map kernels; }; -#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt) \ +#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt) \ namespace infini { \ static const bool _CAT(_register_kernel_, cnt) = \ KernelRegistry::getInstance().registerKernel( \ - KernelAttrs{device, opType, dataType}, new kernel()); \ + KernelAttrs{device, opType, dataType}, new kernel(), name); \ } -#define REGISTER_KERNEL(device, opType, dataType, kernel) \ - _REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__) +#define REGISTER_KERNEL(device, opType, dataType, kernel, name) \ + _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__) } // namespace infini diff --git a/include/core/mutator.h b/include/core/mutator.h index ac2c169d..42402151 100644 --- a/include/core/mutator.h +++ b/include/core/mutator.h @@ -4,11 +4,16 @@ namespace infini { class Mutator { + private: + int candidatesLimit; + // // Statistical data + // int numTotalCandidates; + public: - Mutator(){}; + Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){}; virtual ~Mutator(){}; virtual vector run(const Graph &in_graph) = 0; }; -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/include/core/operator.h b/include/core/operator.h index bf54737f..9d04aed2 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -94,18 +94,42 @@ enum class ActType { Tanh, }; -struct OpAttrs { +struct OpPerfKey { + HashType hash; + OpType opType; + vector attrs; + public: - virtual bool operator<(const OpAttrs &rhs) const { - IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch."); - // Empty OpAttrs are equal + OpPerfKey(HashType hash, OpType opType, vector attrs = {}) + : hash(hash), opType(opType), attrs(attrs) {} + bool operator==(const OpPerfKey &rhs) const { + if (hash != rhs.hash) + return false; + if (opType != rhs.opType) + return false; + if (attrs != rhs.attrs) + return false; + return true; + } + + // TODO: remove this function after we use unordered_map in PerfEngine + bool operator<(const OpPerfKey &rhs) const { + if (hash != rhs.hash) + return hash < rhs.hash; + if (opType != rhs.opType) + return opType < rhs.opType; + if (attrs.size() != rhs.attrs.size()) + return attrs.size() < rhs.attrs.size(); + for (size_t i = 0; i < attrs.size(); ++i) + if (attrs[i] != rhs.attrs[i]) + return attrs[i] < rhs.attrs[i]; return false; } - virtual ~OpAttrs() {} }; class OperatorNode : public Object { - public: + friend class Kernel; + protected: OpType type; TensorVec inputs; @@ -117,7 +141,7 @@ class OperatorNode : public Object { OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs) : type(opType), inputs(inputs), outputs(outputs) {} virtual vector computeShape() const = 0; - virtual OpAttrs getOpAttrs() const = 0; + virtual OpPerfKey getOpAttrs() const = 0; public: // check Op type bool isLinearOp() const; @@ -143,6 +167,14 @@ class OperatorNode : public Object { virtual int numInputs() const = 0; virtual int numOutputs() const = 0; + virtual HashType hash() const { IT_TODO_HALT(); } + virtual HashType hashWithShape() const { IT_TODO_HALT(); } }; -} // namespace infini \ No newline at end of file +} // namespace infini + +namespace std { +template <> struct hash { + size_t operator()(const infini::OpPerfKey &key) const { return key.hash; } +}; +} // namespace std \ No newline at end of file diff --git a/include/core/perf_engine.h b/include/core/perf_engine.h index b55baf26..563ad704 100644 --- a/include/core/perf_engine.h +++ b/include/core/perf_engine.h @@ -6,7 +6,9 @@ namespace infini { class PerfEngine { public: - using Key = std::pair; + // TODO: Key should be OpPerfKey + Context(maybe implicat) to support + // multiple candiate kernels. + using Key = std::pair; private: map data; diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index da08e118..cafea062 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -34,22 +34,13 @@ class TensorBaseNode : public Object { // NotCounted, // }; - // // TODO: is more compute state needed? - // enum ComputeState { - // NotComputed, - // // Allocated, - // // Initialized, - // // ComputedPartial, - // ComputedFull, - // }; - protected: int dim; DataType dtype; vector> inputOf; WRef outputOf; - // TODO: use a blob instead of vector + // TODO: Ref -> Ref Ref data; // ComputeState computed; // static int random_seed[256 * 16]; @@ -267,4 +258,4 @@ class TensorBaseNode : public Object { // void printShape(); }; -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/include/operators/matmul.h b/include/operators/matmul.h index 3a70920b..405b3f76 100644 --- a/include/operators/matmul.h +++ b/include/operators/matmul.h @@ -4,30 +4,15 @@ namespace infini { class MatmulNode : public OperatorNode { - public: - struct MatmulArgs : public OpAttrs { - int b, m, n, k; - // PET assume a row-major tensor layout. transA=false means default - // dims, true means A should be transposed before matmul. This is in - // oppsite to column-major BLAS. - bool transA, transB; - ActType act; - - MatmulArgs(int b, int m, int n, int k, bool transA, bool transB, - ActType act) - : b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) { - } - - bool operator<(const OpAttrs &rhsGeneric) { - auto rhs = dynamic_cast(rhsGeneric); - return std::tie(b, m, n, k, transA, transB, act) < - std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB, - rhs.act); - } - }; - private: - MatmulArgs args; + // InfiniTensor assume a row-major tensor layout. transA=false means default + // dims, true means A should be transposed before matmul. This is in + // oppsite to column-major BLAS. + bool transA, transB; + ActType act; + + // Auxiliary attributes + int b, m, n, k; public: MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false, @@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode { int numOutputs() const override { return 1; } Tensor getBias() const { return inputs[2]; } - void setAct(ActType act) { this->args.act = act; } - ActType getAct() const { return args.act; } - bool getTransA() const { return args.transA; } - bool getTransB() const { return args.transB; } + ActType getAct() const { return act; } + bool getTransA() const { return transA; } + bool getTransB() const { return transB; } + int getB() const { return b; } + int getM() const { return m; } + int getN() const { return n; } + int getK() const { return k; } - MatmulArgs getArgs() const { return args; } - OpAttrs getOpAttrs() const override { return args; } + HashType hashWithShape() const override; + OpPerfKey getOpAttrs() const override; private: // Q: whether to check the output? Since we can build an Op first and then // assure output. - // Fix 1: make shape inference a static method. But OpAttrs are required. + // Fix 1: make shape inference a static method. But OpPerfKey are required. bool checkValid(const TensorVec &inputs) const; }; -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/src/kerels/cpu/matmul.cc b/src/kerels/cpu/matmul.cc index 45c46eab..e8ae5c7e 100644 --- a/src/kerels/cpu/matmul.cc +++ b/src/kerels/cpu/matmul.cc @@ -9,10 +9,9 @@ template class NaiveMatmul : public Kernel { T *A = reinterpret_cast(op->getInputs(0)->getDataPtr().get()); T *B = reinterpret_cast(op->getInputs(1)->getDataPtr().get()); T *C = reinterpret_cast(op->getOutput()->getDataPtr().get()); - const auto args = op->getArgs(); - IT_ASSERT(args.transA == false && args.transB == false); - IT_ASSERT(args.act == ActType::None); - const int M = args.m, N = args.n, K = args.k; + IT_ASSERT(op->getTransA() == false && op->getTransB() == false); + IT_ASSERT(op->getAct() == ActType::None); + const int M = op->getM(), N = op->getN(), K = op->getK(); for (int i = 0; i < M; i++) { for (int j = 0; j < N; j++) { C[i * N + j] = 0; @@ -33,8 +32,8 @@ template class NaiveMatmul : public Kernel { }; REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32, - NaiveMatmul); + NaiveMatmul, "MatmulNaive_CPU_uint32"); REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32, - NaiveMatmul); + NaiveMatmul, "MatmulNaive_CPU_float32"); } // namespace infini \ No newline at end of file diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index bdd11e2f..2f9666a2 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -2,27 +2,24 @@ namespace infini { -vector MatmulNode::computeShape() const { - Shape ret{args.b, args.m, args.n}; - return {ret}; -} +vector MatmulNode::computeShape() const { return {{b, m, n}}; } MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB, Tensor bias, ActType act) - : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), - args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1], - transB ? B->getDims()[1] : B->getDims()[2], - transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) { + : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA), + transB(transB), act(act), b(A->getDims()[0]), + m(transA ? A->getDims()[2] : A->getDims()[1]), + n(transB ? B->getDims()[1] : B->getDims()[2]), + k(transA ? A->getDims()[1] : A->getDims()[2]) { IT_ASSERT(checkValid(inputs)); } string MatmulNode::toString() const { std::ostringstream os; - MatmulArgs args = getArgs(); - os << "Matmul([" << (args.transA ? "A^T" : "A") << "," - << (args.transB ? "B^T" : "B") << ",act=" << (int)args.act - << "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid() - << ",C=" << outputs[0]->getGuid() << ")"; + os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B") + << ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid() + << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid() + << ")"; return os.str(); } @@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const { // return false; IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3); IT_ASSERT(A->getDims()[0] == B->getDims()[0]); - IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) == - (args.transB ? B->getDims()[2] : B->getDims()[1])); + IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) == + (transB ? B->getDims()[2] : B->getDims()[1])); // if (A->getDims().size() != 3 || B->getDims().size() != 3) { // return false; // } @@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const { // } return true; } + +HashType MatmulNode::hashWithShape() const { + // TODO: use a real hash + return b + m + n + k + transA + transB + enum_to_underlying(act); +} + +OpPerfKey MatmulNode::getOpAttrs() const { + return OpPerfKey(hashWithShape(), type, + {b, m, n, k, transA, transB, enum_to_underlying(act)}); +} } // namespace infini \ No newline at end of file