Update: OpAttrs -> OpPerfKey

2022-08-09 14:58:45 +08:00 · 2022-08-09 14:58:45 +08:00 · 8b685ae4a6
parent b7e2096a26
commit 8b685ae4a6
11 changed files with 127 additions and 90 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
 include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)

 if(BUILD_TEST)
-  # TODO: chekc set
  set(BUILD_GMOCK
      OFF
      CACHE BOOL "Do not build gmock" FORCE)
--- a/include/core/common.h
+++ b/include/core/common.h
@ -16,8 +16,6 @@
 namespace infini {
 using std::list;
 using std::map;
-using std::nullopt;
-using std::optional;
 using std::pair;
 using std::set;
 using std::string;
@ -29,6 +27,7 @@ using std::vector;

 // Aliases
 using dtype = float;
+using HashType = size_t; // compatible with std::hash

 // Metaprogramming utilities
 #define _CAT(A, B) A##B
--- a/include/core/graph.h
+++ b/include/core/graph.h
@ -4,6 +4,7 @@

 namespace infini {

+// TODO: graph should be attached to a context
 class GraphNode : public Object {
  protected:
    TensorVec tensors;
@ -29,12 +30,18 @@ class GraphNode : public Object {
        return tensor;
    }

-    void updateConnection();
    void dataMalloc();

+  private:
+    // TODO: updateConnection
+    /**
+     * @brief Add reverse connections and Op relationship in ctor.
+     */
+    void updateConnection();
+
    // TODO: move to another class
    // bool exportOnnx(const char *path);
    // bool importOnnx(const char *net);
 };

-} // namespace infini
+} // namespace infini
--- a/include/core/kernel.h
+++ b/include/core/kernel.h
@ -23,38 +23,46 @@ class Kernel {
 };

 class KernelRegistry {
+  public:
+    using KernelRecord =
+        tuple<Kernel *const, const string, const int>; // Kernel, name, ID
+
+  private:
+    std::map<KernelAttrs, KernelRecord> kernels;
+    int nKernels = 0;
+
  public:
    ~KernelRegistry() {
        for (auto &[k, v] : kernels)
-            delete v;
+            delete std::get<0>(v);
    }
    static KernelRegistry &getInstance() {
        static KernelRegistry instance;
        return instance;
    }
-    bool registerKernel(const KernelAttrs &key, Kernel *kernel) {
-        // TODO: kernels with priority
+    bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
+        // TODO: mutliple kernels support: priority and check name
        IT_ASSERT(kernels.find(key) == kernels.end(),
                  "Kernel already registered");
-        kernels.emplace(key, kernel);
+        kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
        return true;
    }
    Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
+        return std::get<0>(kernels.at(kernelAttrs));
+    }
+    const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
        return kernels.at(kernelAttrs);
    }
-
-  private:
-    std::map<KernelAttrs, Kernel *> kernels;
 };

-#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt)              \
+#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt)        \
    namespace infini {                                                         \
    static const bool _CAT(_register_kernel_, cnt) =                           \
        KernelRegistry::getInstance().registerKernel(                          \
-            KernelAttrs{device, opType, dataType}, new kernel());              \
+            KernelAttrs{device, opType, dataType}, new kernel(), name);        \
    }

-#define REGISTER_KERNEL(device, opType, dataType, kernel)                      \
-    _REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__)
+#define REGISTER_KERNEL(device, opType, dataType, kernel, name)                \
+    _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)

 } // namespace infini
--- a/include/core/mutator.h
+++ b/include/core/mutator.h
@ -4,11 +4,16 @@
 namespace infini {

 class Mutator {
+  private:
+    int candidatesLimit;
+    // // Statistical data
+    // int numTotalCandidates;
+
  public:
-    Mutator(){};
+    Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
    virtual ~Mutator(){};

    virtual vector<Graph> run(const Graph &in_graph) = 0;
 };

-} // namespace infini
+} // namespace infini
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -94,18 +94,42 @@ enum class ActType {
    Tanh,
 };

-struct OpAttrs {
+struct OpPerfKey {
+    HashType hash;
+    OpType opType;
+    vector<int> attrs;
+
  public:
-    virtual bool operator<(const OpAttrs &rhs) const {
-        IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch.");
-        // Empty OpAttrs are equal
+    OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
+        : hash(hash), opType(opType), attrs(attrs) {}
+    bool operator==(const OpPerfKey &rhs) const {
+        if (hash != rhs.hash)
+            return false;
+        if (opType != rhs.opType)
+            return false;
+        if (attrs != rhs.attrs)
+            return false;
+        return true;
+    }
+
+    // TODO: remove this function after we use unordered_map in PerfEngine
+    bool operator<(const OpPerfKey &rhs) const {
+        if (hash != rhs.hash)
+            return hash < rhs.hash;
+        if (opType != rhs.opType)
+            return opType < rhs.opType;
+        if (attrs.size() != rhs.attrs.size())
+            return attrs.size() < rhs.attrs.size();
+        for (size_t i = 0; i < attrs.size(); ++i)
+            if (attrs[i] != rhs.attrs[i])
+                return attrs[i] < rhs.attrs[i];
        return false;
    }
-    virtual ~OpAttrs() {}
 };

 class OperatorNode : public Object {
-  public:
+    friend class Kernel;
+
  protected:
    OpType type;
    TensorVec inputs;
@ -117,7 +141,7 @@ class OperatorNode : public Object {
    OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
        : type(opType), inputs(inputs), outputs(outputs) {}
    virtual vector<Shape> computeShape() const = 0;
-    virtual OpAttrs getOpAttrs() const = 0;
+    virtual OpPerfKey getOpAttrs() const = 0;

  public: // check Op type
    bool isLinearOp() const;
@ -143,6 +167,14 @@ class OperatorNode : public Object {

    virtual int numInputs() const = 0;
    virtual int numOutputs() const = 0;
+    virtual HashType hash() const { IT_TODO_HALT(); }
+    virtual HashType hashWithShape() const { IT_TODO_HALT(); }
 };

-} // namespace infini
+} // namespace infini
+
+namespace std {
+template <> struct hash<infini::OpPerfKey> {
+    size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
+};
+} // namespace std
--- a/include/core/perf_engine.h
+++ b/include/core/perf_engine.h
@ -6,7 +6,9 @@ namespace infini {

 class PerfEngine {
  public:
-    using Key = std::pair<KernelAttrs, OpAttrs>;
+    // TODO: Key should be OpPerfKey + Context(maybe implicat) to support
+    // multiple candiate kernels.
+    using Key = std::pair<KernelAttrs, OpPerfKey>;

  private:
    map<Key, PerfRecord> data;
--- a/include/core/tensor_base.h
+++ b/include/core/tensor_base.h
@ -34,22 +34,13 @@ class TensorBaseNode : public Object {
    //     NotCounted,
    // };

-    // // TODO: is more compute state needed?
-    // enum ComputeState {
-    //     NotComputed,
-    //     // Allocated,
-    //     // Initialized,
-    //     // ComputedPartial,
-    //     ComputedFull,
-    // };
-
  protected:
    int dim;

    DataType dtype;
    vector<WRef<TensorBaseNode>> inputOf;
    WRef<TensorBaseNode> outputOf;
-    // TODO: use a blob instead of vector
+    // TODO: Ref<void> -> Ref<Blob>
    Ref<VType[]> data;
    // ComputeState computed;
    // static int random_seed[256 * 16];
@ -267,4 +258,4 @@ class TensorBaseNode : public Object {
    //     void printShape();
 };

-} // namespace infini
+} // namespace infini
--- a/include/operators/matmul.h
+++ b/include/operators/matmul.h
@ -4,30 +4,15 @@
 namespace infini {

 class MatmulNode : public OperatorNode {
-  public:
-    struct MatmulArgs : public OpAttrs {
-        int b, m, n, k;
-        // PET assume a row-major tensor layout. transA=false means default
-        // dims, true means A should be transposed before matmul. This is in
-        // oppsite to column-major BLAS.
-        bool transA, transB;
-        ActType act;
-
-        MatmulArgs(int b, int m, int n, int k, bool transA, bool transB,
-                   ActType act)
-            : b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) {
-        }
-
-        bool operator<(const OpAttrs &rhsGeneric) {
-            auto rhs = dynamic_cast<const MatmulArgs &>(rhsGeneric);
-            return std::tie(b, m, n, k, transA, transB, act) <
-                   std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB,
-                            rhs.act);
-        }
-    };
-
  private:
-    MatmulArgs args;
+    // InfiniTensor assume a row-major tensor layout. transA=false means default
+    // dims, true means A should be transposed before matmul. This is in
+    // oppsite to column-major BLAS.
+    bool transA, transB;
+    ActType act;
+
+    // Auxiliary attributes
+    int b, m, n, k;

  public:
    MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode {
    int numOutputs() const override { return 1; }

    Tensor getBias() const { return inputs[2]; }
-    void setAct(ActType act) { this->args.act = act; }
-    ActType getAct() const { return args.act; }
-    bool getTransA() const { return args.transA; }
-    bool getTransB() const { return args.transB; }
+    ActType getAct() const { return act; }
+    bool getTransA() const { return transA; }
+    bool getTransB() const { return transB; }
+    int getB() const { return b; }
+    int getM() const { return m; }
+    int getN() const { return n; }
+    int getK() const { return k; }

-    MatmulArgs getArgs() const { return args; }
-    OpAttrs getOpAttrs() const override { return args; }
+    HashType hashWithShape() const override;
+    OpPerfKey getOpAttrs() const override;

  private:
    // Q: whether to check the output? Since we can build an Op first and then
    // assure output.
-    // Fix 1: make shape inference a static method. But OpAttrs are required.
+    // Fix 1: make shape inference a static method. But OpPerfKey are required.
    bool checkValid(const TensorVec &inputs) const;
 };

-} // namespace infini
+} // namespace infini
--- a/src/kerels/cpu/matmul.cc
+++ b/src/kerels/cpu/matmul.cc
@ -9,10 +9,9 @@ template <typename T> class NaiveMatmul : public Kernel {
        T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
        T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
        T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
-        const auto args = op->getArgs();
-        IT_ASSERT(args.transA == false && args.transB == false);
-        IT_ASSERT(args.act == ActType::None);
-        const int M = args.m, N = args.n, K = args.k;
+        IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
+        IT_ASSERT(op->getAct() == ActType::None);
+        const int M = op->getM(), N = op->getN(), K = op->getK();
        for (int i = 0; i < M; i++) {
            for (int j = 0; j < N; j++) {
                C[i * N + j] = 0;
@ -33,8 +32,8 @@ template <typename T> class NaiveMatmul : public Kernel {
 };

 REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
-                NaiveMatmul<uint32_t>);
+                NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
 REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
-                NaiveMatmul<float>);
+                NaiveMatmul<float>, "MatmulNaive_CPU_float32");

 } // namespace infini
--- a/src/operators/matmul.cc
+++ b/src/operators/matmul.cc
@ -2,27 +2,24 @@

 namespace infini {

-vector<Shape> MatmulNode::computeShape() const {
-    Shape ret{args.b, args.m, args.n};
-    return {ret};
-}
+vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }

 MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
                       Tensor bias, ActType act)
-    : OperatorNode(OpType::Matmul, {A, B, bias}, {C}),
-      args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1],
-           transB ? B->getDims()[1] : B->getDims()[2],
-           transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) {
+    : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
+      transB(transB), act(act), b(A->getDims()[0]),
+      m(transA ? A->getDims()[2] : A->getDims()[1]),
+      n(transB ? B->getDims()[1] : B->getDims()[2]),
+      k(transA ? A->getDims()[1] : A->getDims()[2]) {
    IT_ASSERT(checkValid(inputs));
 }

 string MatmulNode::toString() const {
    std::ostringstream os;
-    MatmulArgs args = getArgs();
-    os << "Matmul([" << (args.transA ? "A^T" : "A") << ","
-       << (args.transB ? "B^T" : "B") << ",act=" << (int)args.act
-       << "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid()
-       << ",C=" << outputs[0]->getGuid() << ")";
+    os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
+       << ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
+       << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
+       << ")";
    return os.str();
 }

@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
    //     return false;
    IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
    IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
-    IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) ==
-              (args.transB ? B->getDims()[2] : B->getDims()[1]));
+    IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
+              (transB ? B->getDims()[2] : B->getDims()[1]));
    // if (A->getDims().size() != 3 || B->getDims().size() != 3) {
    //     return false;
    // }
@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
    // }
    return true;
 }
+
+HashType MatmulNode::hashWithShape() const {
+    // TODO: use a real hash
+    return b + m + n + k + transA + transB + enum_to_underlying(act);
+}
+
+OpPerfKey MatmulNode::getOpAttrs() const {
+    return OpPerfKey(hashWithShape(), type,
+                     {b, m, n, k, transA, transB, enum_to_underlying(act)});
+}
 } // namespace infini