Update: OpAttrs -> OpPerfKey

2022-08-09 14:58:45 +08:00 · 2022-08-09 14:58:45 +08:00 · 8b685ae4a6
parent b7e2096a26
commit 8b685ae4a6
11 changed files with 127 additions and 90 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
 include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)
 if(BUILD_TEST)
  # TODO: chekc set
  set(BUILD_GMOCK
      OFF
      CACHE BOOL "Do not build gmock" FORCE)
--- a/include/core/common.h
+++ b/include/core/common.h
@ -16,8 +16,6 @@
 namespace infini {
 using std::list;
 using std::map;
 using std::nullopt;
 using std::optional;
 using std::pair;
 using std::set;
 using std::string;
@ -29,6 +27,7 @@ using std::vector;
 // Aliases
 using dtype = float;
 using HashType = size_t; // compatible with std::hash
 // Metaprogramming utilities
 #define _CAT(A, B) A##B
--- a/include/core/graph.h
+++ b/include/core/graph.h
@ -4,6 +4,7 @@
 namespace infini {
 // TODO: graph should be attached to a context
 class GraphNode : public Object {
  protected:
    TensorVec tensors;
@ -29,12 +30,18 @@ class GraphNode : public Object {
        return tensor;
    }
    void updateConnection();
    void dataMalloc();
  private:
    // TODO: updateConnection
    /**
     * @brief Add reverse connections and Op relationship in ctor.
     */
    void updateConnection();
    // TODO: move to another class
    // bool exportOnnx(const char *path);
    // bool importOnnx(const char *net);
 };
-} // namespace infini
+} // namespace infini
--- a/include/core/kernel.h
+++ b/include/core/kernel.h
@ -23,38 +23,46 @@ class Kernel {
 };
 class KernelRegistry {
  public:
    using KernelRecord =
        tuple<Kernel *const, const string, const int>; // Kernel, name, ID
  private:
    std::map<KernelAttrs, KernelRecord> kernels;
    int nKernels = 0;
  public:
    ~KernelRegistry() {
        for (auto &[k, v] : kernels)
-            delete v;
+            delete std::get<0>(v);
    }
    static KernelRegistry &getInstance() {
        static KernelRegistry instance;
        return instance;
    }
-    bool registerKernel(const KernelAttrs &key, Kernel *kernel) {
+    bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
-        // TODO: kernels with priority
+        // TODO: mutliple kernels support: priority and check name
        IT_ASSERT(kernels.find(key) == kernels.end(),
                  "Kernel already registered");
-        kernels.emplace(key, kernel);
+        kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
        return true;
    }
    Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
        return std::get<0>(kernels.at(kernelAttrs));
    }
    const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
        return kernels.at(kernelAttrs);
    }
  private:
    std::map<KernelAttrs, Kernel *> kernels;
 };
-#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt)              \
+#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt)        \
    namespace infini {                                                         \
    static const bool _CAT(_register_kernel_, cnt) =                           \
        KernelRegistry::getInstance().registerKernel(                          \
-            KernelAttrs{device, opType, dataType}, new kernel());              \
+            KernelAttrs{device, opType, dataType}, new kernel(), name);        \
    }
-#define REGISTER_KERNEL(device, opType, dataType, kernel)                      \
+#define REGISTER_KERNEL(device, opType, dataType, kernel, name)                \
-    _REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__)
+    _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)
 } // namespace infini
--- a/include/core/mutator.h
+++ b/include/core/mutator.h
@ -4,11 +4,16 @@
 namespace infini {
 class Mutator {
  private:
    int candidatesLimit;
    // // Statistical data
    // int numTotalCandidates;
  public:
-    Mutator(){};
+    Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
    virtual ~Mutator(){};
    virtual vector<Graph> run(const Graph &in_graph) = 0;
 };
-} // namespace infini
+} // namespace infini
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -94,18 +94,42 @@ enum class ActType {
    Tanh,
 };
-struct OpAttrs {
+struct OpPerfKey {
    HashType hash;
    OpType opType;
    vector<int> attrs;
  public:
-    virtual bool operator<(const OpAttrs &rhs) const {
+    OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
-        IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch.");
+        : hash(hash), opType(opType), attrs(attrs) {}
-        // Empty OpAttrs are equal
+    bool operator==(const OpPerfKey &rhs) const {
        if (hash != rhs.hash)
            return false;
        if (opType != rhs.opType)
            return false;
        if (attrs != rhs.attrs)
            return false;
        return true;
    }
    // TODO: remove this function after we use unordered_map in PerfEngine
    bool operator<(const OpPerfKey &rhs) const {
        if (hash != rhs.hash)
            return hash < rhs.hash;
        if (opType != rhs.opType)
            return opType < rhs.opType;
        if (attrs.size() != rhs.attrs.size())
            return attrs.size() < rhs.attrs.size();
        for (size_t i = 0; i < attrs.size(); ++i)
            if (attrs[i] != rhs.attrs[i])
                return attrs[i] < rhs.attrs[i];
        return false;
    }
    virtual ~OpAttrs() {}
 };
 class OperatorNode : public Object {
-  public:
+    friend class Kernel;
  protected:
    OpType type;
    TensorVec inputs;
@ -117,7 +141,7 @@ class OperatorNode : public Object {
    OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
        : type(opType), inputs(inputs), outputs(outputs) {}
    virtual vector<Shape> computeShape() const = 0;
-    virtual OpAttrs getOpAttrs() const = 0;
+    virtual OpPerfKey getOpAttrs() const = 0;
  public: // check Op type
    bool isLinearOp() const;
@ -143,6 +167,14 @@ class OperatorNode : public Object {
    virtual int numInputs() const = 0;
    virtual int numOutputs() const = 0;
    virtual HashType hash() const { IT_TODO_HALT(); }
    virtual HashType hashWithShape() const { IT_TODO_HALT(); }
 };
-} // namespace infini
+} // namespace infini
 namespace std {
 template <> struct hash<infini::OpPerfKey> {
    size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
 };
 } // namespace std
--- a/include/core/perf_engine.h
+++ b/include/core/perf_engine.h
@ -6,7 +6,9 @@ namespace infini {
 class PerfEngine {
  public:
-    using Key = std::pair<KernelAttrs, OpAttrs>;
+    // TODO: Key should be OpPerfKey + Context(maybe implicat) to support
    // multiple candiate kernels.
    using Key = std::pair<KernelAttrs, OpPerfKey>;
  private:
    map<Key, PerfRecord> data;
--- a/include/core/tensor_base.h
+++ b/include/core/tensor_base.h
@ -34,22 +34,13 @@ class TensorBaseNode : public Object {
    //     NotCounted,
    // };
    // // TODO: is more compute state needed?
    // enum ComputeState {
    //     NotComputed,
    //     // Allocated,
    //     // Initialized,
    //     // ComputedPartial,
    //     ComputedFull,
    // };
  protected:
    int dim;
    DataType dtype;
    vector<WRef<TensorBaseNode>> inputOf;
    WRef<TensorBaseNode> outputOf;
-    // TODO: use a blob instead of vector
+    // TODO: Ref<void> -> Ref<Blob>
    Ref<VType[]> data;
    // ComputeState computed;
    // static int random_seed[256 * 16];
@ -267,4 +258,4 @@ class TensorBaseNode : public Object {
    //     void printShape();
 };
-} // namespace infini
+} // namespace infini
--- a/include/operators/matmul.h
+++ b/include/operators/matmul.h
@ -4,30 +4,15 @@
 namespace infini {
 class MatmulNode : public OperatorNode {
  public:
    struct MatmulArgs : public OpAttrs {
        int b, m, n, k;
        // PET assume a row-major tensor layout. transA=false means default
        // dims, true means A should be transposed before matmul. This is in
        // oppsite to column-major BLAS.
        bool transA, transB;
        ActType act;
        MatmulArgs(int b, int m, int n, int k, bool transA, bool transB,
                   ActType act)
            : b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) {
        }
        bool operator<(const OpAttrs &rhsGeneric) {
            auto rhs = dynamic_cast<const MatmulArgs &>(rhsGeneric);
            return std::tie(b, m, n, k, transA, transB, act) <
                   std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB,
                            rhs.act);
        }
    };
  private:
-    MatmulArgs args;
+    // InfiniTensor assume a row-major tensor layout. transA=false means default
    // dims, true means A should be transposed before matmul. This is in
    // oppsite to column-major BLAS.
    bool transA, transB;
    ActType act;
    // Auxiliary attributes
    int b, m, n, k;
  public:
    MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode {
    int numOutputs() const override { return 1; }
    Tensor getBias() const { return inputs[2]; }
-    void setAct(ActType act) { this->args.act = act; }
+    ActType getAct() const { return act; }
-    ActType getAct() const { return args.act; }
+    bool getTransA() const { return transA; }
-    bool getTransA() const { return args.transA; }
+    bool getTransB() const { return transB; }
-    bool getTransB() const { return args.transB; }
+    int getB() const { return b; }
    int getM() const { return m; }
    int getN() const { return n; }
    int getK() const { return k; }
-    MatmulArgs getArgs() const { return args; }
+    HashType hashWithShape() const override;
-    OpAttrs getOpAttrs() const override { return args; }
+    OpPerfKey getOpAttrs() const override;
  private:
    // Q: whether to check the output? Since we can build an Op first and then
    // assure output.
-    // Fix 1: make shape inference a static method. But OpAttrs are required.
+    // Fix 1: make shape inference a static method. But OpPerfKey are required.
    bool checkValid(const TensorVec &inputs) const;
 };
-} // namespace infini
+} // namespace infini
--- a/src/kerels/cpu/matmul.cc
+++ b/src/kerels/cpu/matmul.cc
@ -9,10 +9,9 @@ template <typename T> class NaiveMatmul : public Kernel {
        T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
        T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
        T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
-        const auto args = op->getArgs();
+        IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
-        IT_ASSERT(args.transA == false && args.transB == false);
+        IT_ASSERT(op->getAct() == ActType::None);
-        IT_ASSERT(args.act == ActType::None);
+        const int M = op->getM(), N = op->getN(), K = op->getK();
        const int M = args.m, N = args.n, K = args.k;
        for (int i = 0; i < M; i++) {
            for (int j = 0; j < N; j++) {
                C[i * N + j] = 0;
@ -33,8 +32,8 @@ template <typename T> class NaiveMatmul : public Kernel {
 };
 REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
-                NaiveMatmul<uint32_t>);
+                NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
 REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
-                NaiveMatmul<float>);
+                NaiveMatmul<float>, "MatmulNaive_CPU_float32");
 } // namespace infini
--- a/src/operators/matmul.cc
+++ b/src/operators/matmul.cc
@ -2,27 +2,24 @@
 namespace infini {
-vector<Shape> MatmulNode::computeShape() const {
+vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }
    Shape ret{args.b, args.m, args.n};
    return {ret};
 }
 MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
                       Tensor bias, ActType act)
-    : OperatorNode(OpType::Matmul, {A, B, bias}, {C}),
+    : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
-      args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1],
+      transB(transB), act(act), b(A->getDims()[0]),
-           transB ? B->getDims()[1] : B->getDims()[2],
+      m(transA ? A->getDims()[2] : A->getDims()[1]),
-           transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) {
+      n(transB ? B->getDims()[1] : B->getDims()[2]),
      k(transA ? A->getDims()[1] : A->getDims()[2]) {
    IT_ASSERT(checkValid(inputs));
 }
 string MatmulNode::toString() const {
    std::ostringstream os;
-    MatmulArgs args = getArgs();
+    os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
-    os << "Matmul([" << (args.transA ? "A^T" : "A") << ","
+       << ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
-       << (args.transB ? "B^T" : "B") << ",act=" << (int)args.act
+       << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
-       << "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid()
+       << ")";
       << ",C=" << outputs[0]->getGuid() << ")";
    return os.str();
 }
@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
    //     return false;
    IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
    IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
-    IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) ==
+    IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
-              (args.transB ? B->getDims()[2] : B->getDims()[1]));
+              (transB ? B->getDims()[2] : B->getDims()[1]));
    // if (A->getDims().size() != 3 || B->getDims().size() != 3) {
    //     return false;
    // }
@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
    // }
    return true;
 }
 HashType MatmulNode::hashWithShape() const {
    // TODO: use a real hash
    return b + m + n + k + transA + transB + enum_to_underlying(act);
 }
 OpPerfKey MatmulNode::getOpAttrs() const {
    return OpPerfKey(hashWithShape(), type,
                     {b, m, n, k, transA, transB, enum_to_underlying(act)});
 }
 } // namespace infini