Update: OpAttrs -> OpPerfKey

This commit is contained in:
Liyan Zheng 2022-08-09 14:58:45 +08:00
parent b7e2096a26
commit 8b685ae4a6
11 changed files with 127 additions and 90 deletions

View File

@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include) include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)
if(BUILD_TEST) if(BUILD_TEST)
# TODO: chekc set
set(BUILD_GMOCK set(BUILD_GMOCK
OFF OFF
CACHE BOOL "Do not build gmock" FORCE) CACHE BOOL "Do not build gmock" FORCE)

View File

@ -16,8 +16,6 @@
namespace infini { namespace infini {
using std::list; using std::list;
using std::map; using std::map;
using std::nullopt;
using std::optional;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
@ -29,6 +27,7 @@ using std::vector;
// Aliases // Aliases
using dtype = float; using dtype = float;
using HashType = size_t; // compatible with std::hash
// Metaprogramming utilities // Metaprogramming utilities
#define _CAT(A, B) A##B #define _CAT(A, B) A##B

View File

@ -4,6 +4,7 @@
namespace infini { namespace infini {
// TODO: graph should be attached to a context
class GraphNode : public Object { class GraphNode : public Object {
protected: protected:
TensorVec tensors; TensorVec tensors;
@ -29,12 +30,18 @@ class GraphNode : public Object {
return tensor; return tensor;
} }
void updateConnection();
void dataMalloc(); void dataMalloc();
private:
// TODO: updateConnection
/**
* @brief Add reverse connections and Op relationship in ctor.
*/
void updateConnection();
// TODO: move to another class // TODO: move to another class
// bool exportOnnx(const char *path); // bool exportOnnx(const char *path);
// bool importOnnx(const char *net); // bool importOnnx(const char *net);
}; };
} // namespace infini } // namespace infini

View File

@ -23,38 +23,46 @@ class Kernel {
}; };
class KernelRegistry { class KernelRegistry {
public:
using KernelRecord =
tuple<Kernel *const, const string, const int>; // Kernel, name, ID
private:
std::map<KernelAttrs, KernelRecord> kernels;
int nKernels = 0;
public: public:
~KernelRegistry() { ~KernelRegistry() {
for (auto &[k, v] : kernels) for (auto &[k, v] : kernels)
delete v; delete std::get<0>(v);
} }
static KernelRegistry &getInstance() { static KernelRegistry &getInstance() {
static KernelRegistry instance; static KernelRegistry instance;
return instance; return instance;
} }
bool registerKernel(const KernelAttrs &key, Kernel *kernel) { bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
// TODO: kernels with priority // TODO: mutliple kernels support: priority and check name
IT_ASSERT(kernels.find(key) == kernels.end(), IT_ASSERT(kernels.find(key) == kernels.end(),
"Kernel already registered"); "Kernel already registered");
kernels.emplace(key, kernel); kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
return true; return true;
} }
Kernel *getKernel(const KernelAttrs &kernelAttrs) const { Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
return std::get<0>(kernels.at(kernelAttrs));
}
const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
return kernels.at(kernelAttrs); return kernels.at(kernelAttrs);
} }
private:
std::map<KernelAttrs, Kernel *> kernels;
}; };
#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt) \ #define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt) \
namespace infini { \ namespace infini { \
static const bool _CAT(_register_kernel_, cnt) = \ static const bool _CAT(_register_kernel_, cnt) = \
KernelRegistry::getInstance().registerKernel( \ KernelRegistry::getInstance().registerKernel( \
KernelAttrs{device, opType, dataType}, new kernel()); \ KernelAttrs{device, opType, dataType}, new kernel(), name); \
} }
#define REGISTER_KERNEL(device, opType, dataType, kernel) \ #define REGISTER_KERNEL(device, opType, dataType, kernel, name) \
_REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__) _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)
} // namespace infini } // namespace infini

View File

@ -4,11 +4,16 @@
namespace infini { namespace infini {
class Mutator { class Mutator {
private:
int candidatesLimit;
// // Statistical data
// int numTotalCandidates;
public: public:
Mutator(){}; Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
virtual ~Mutator(){}; virtual ~Mutator(){};
virtual vector<Graph> run(const Graph &in_graph) = 0; virtual vector<Graph> run(const Graph &in_graph) = 0;
}; };
} // namespace infini } // namespace infini

View File

@ -94,18 +94,42 @@ enum class ActType {
Tanh, Tanh,
}; };
struct OpAttrs { struct OpPerfKey {
HashType hash;
OpType opType;
vector<int> attrs;
public: public:
virtual bool operator<(const OpAttrs &rhs) const { OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch."); : hash(hash), opType(opType), attrs(attrs) {}
// Empty OpAttrs are equal bool operator==(const OpPerfKey &rhs) const {
if (hash != rhs.hash)
return false;
if (opType != rhs.opType)
return false;
if (attrs != rhs.attrs)
return false;
return true;
}
// TODO: remove this function after we use unordered_map in PerfEngine
bool operator<(const OpPerfKey &rhs) const {
if (hash != rhs.hash)
return hash < rhs.hash;
if (opType != rhs.opType)
return opType < rhs.opType;
if (attrs.size() != rhs.attrs.size())
return attrs.size() < rhs.attrs.size();
for (size_t i = 0; i < attrs.size(); ++i)
if (attrs[i] != rhs.attrs[i])
return attrs[i] < rhs.attrs[i];
return false; return false;
} }
virtual ~OpAttrs() {}
}; };
class OperatorNode : public Object { class OperatorNode : public Object {
public: friend class Kernel;
protected: protected:
OpType type; OpType type;
TensorVec inputs; TensorVec inputs;
@ -117,7 +141,7 @@ class OperatorNode : public Object {
OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs) OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
: type(opType), inputs(inputs), outputs(outputs) {} : type(opType), inputs(inputs), outputs(outputs) {}
virtual vector<Shape> computeShape() const = 0; virtual vector<Shape> computeShape() const = 0;
virtual OpAttrs getOpAttrs() const = 0; virtual OpPerfKey getOpAttrs() const = 0;
public: // check Op type public: // check Op type
bool isLinearOp() const; bool isLinearOp() const;
@ -143,6 +167,14 @@ class OperatorNode : public Object {
virtual int numInputs() const = 0; virtual int numInputs() const = 0;
virtual int numOutputs() const = 0; virtual int numOutputs() const = 0;
virtual HashType hash() const { IT_TODO_HALT(); }
virtual HashType hashWithShape() const { IT_TODO_HALT(); }
}; };
} // namespace infini } // namespace infini
namespace std {
template <> struct hash<infini::OpPerfKey> {
size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
};
} // namespace std

View File

@ -6,7 +6,9 @@ namespace infini {
class PerfEngine { class PerfEngine {
public: public:
using Key = std::pair<KernelAttrs, OpAttrs>; // TODO: Key should be OpPerfKey + Context(maybe implicat) to support
// multiple candiate kernels.
using Key = std::pair<KernelAttrs, OpPerfKey>;
private: private:
map<Key, PerfRecord> data; map<Key, PerfRecord> data;

View File

@ -34,22 +34,13 @@ class TensorBaseNode : public Object {
// NotCounted, // NotCounted,
// }; // };
// // TODO: is more compute state needed?
// enum ComputeState {
// NotComputed,
// // Allocated,
// // Initialized,
// // ComputedPartial,
// ComputedFull,
// };
protected: protected:
int dim; int dim;
DataType dtype; DataType dtype;
vector<WRef<TensorBaseNode>> inputOf; vector<WRef<TensorBaseNode>> inputOf;
WRef<TensorBaseNode> outputOf; WRef<TensorBaseNode> outputOf;
// TODO: use a blob instead of vector // TODO: Ref<void> -> Ref<Blob>
Ref<VType[]> data; Ref<VType[]> data;
// ComputeState computed; // ComputeState computed;
// static int random_seed[256 * 16]; // static int random_seed[256 * 16];
@ -267,4 +258,4 @@ class TensorBaseNode : public Object {
// void printShape(); // void printShape();
}; };
} // namespace infini } // namespace infini

View File

@ -4,30 +4,15 @@
namespace infini { namespace infini {
class MatmulNode : public OperatorNode { class MatmulNode : public OperatorNode {
public:
struct MatmulArgs : public OpAttrs {
int b, m, n, k;
// PET assume a row-major tensor layout. transA=false means default
// dims, true means A should be transposed before matmul. This is in
// oppsite to column-major BLAS.
bool transA, transB;
ActType act;
MatmulArgs(int b, int m, int n, int k, bool transA, bool transB,
ActType act)
: b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) {
}
bool operator<(const OpAttrs &rhsGeneric) {
auto rhs = dynamic_cast<const MatmulArgs &>(rhsGeneric);
return std::tie(b, m, n, k, transA, transB, act) <
std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB,
rhs.act);
}
};
private: private:
MatmulArgs args; // InfiniTensor assume a row-major tensor layout. transA=false means default
// dims, true means A should be transposed before matmul. This is in
// oppsite to column-major BLAS.
bool transA, transB;
ActType act;
// Auxiliary attributes
int b, m, n, k;
public: public:
MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false, MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode {
int numOutputs() const override { return 1; } int numOutputs() const override { return 1; }
Tensor getBias() const { return inputs[2]; } Tensor getBias() const { return inputs[2]; }
void setAct(ActType act) { this->args.act = act; } ActType getAct() const { return act; }
ActType getAct() const { return args.act; } bool getTransA() const { return transA; }
bool getTransA() const { return args.transA; } bool getTransB() const { return transB; }
bool getTransB() const { return args.transB; } int getB() const { return b; }
int getM() const { return m; }
int getN() const { return n; }
int getK() const { return k; }
MatmulArgs getArgs() const { return args; } HashType hashWithShape() const override;
OpAttrs getOpAttrs() const override { return args; } OpPerfKey getOpAttrs() const override;
private: private:
// Q: whether to check the output? Since we can build an Op first and then // Q: whether to check the output? Since we can build an Op first and then
// assure output. // assure output.
// Fix 1: make shape inference a static method. But OpAttrs are required. // Fix 1: make shape inference a static method. But OpPerfKey are required.
bool checkValid(const TensorVec &inputs) const; bool checkValid(const TensorVec &inputs) const;
}; };
} // namespace infini } // namespace infini

View File

@ -9,10 +9,9 @@ template <typename T> class NaiveMatmul : public Kernel {
T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get()); T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get()); T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get()); T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
const auto args = op->getArgs(); IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
IT_ASSERT(args.transA == false && args.transB == false); IT_ASSERT(op->getAct() == ActType::None);
IT_ASSERT(args.act == ActType::None); const int M = op->getM(), N = op->getN(), K = op->getK();
const int M = args.m, N = args.n, K = args.k;
for (int i = 0; i < M; i++) { for (int i = 0; i < M; i++) {
for (int j = 0; j < N; j++) { for (int j = 0; j < N; j++) {
C[i * N + j] = 0; C[i * N + j] = 0;
@ -33,8 +32,8 @@ template <typename T> class NaiveMatmul : public Kernel {
}; };
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32, REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
NaiveMatmul<uint32_t>); NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32, REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
NaiveMatmul<float>); NaiveMatmul<float>, "MatmulNaive_CPU_float32");
} // namespace infini } // namespace infini

View File

@ -2,27 +2,24 @@
namespace infini { namespace infini {
vector<Shape> MatmulNode::computeShape() const { vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }
Shape ret{args.b, args.m, args.n};
return {ret};
}
MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB, MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
Tensor bias, ActType act) Tensor bias, ActType act)
: OperatorNode(OpType::Matmul, {A, B, bias}, {C}), : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1], transB(transB), act(act), b(A->getDims()[0]),
transB ? B->getDims()[1] : B->getDims()[2], m(transA ? A->getDims()[2] : A->getDims()[1]),
transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) { n(transB ? B->getDims()[1] : B->getDims()[2]),
k(transA ? A->getDims()[1] : A->getDims()[2]) {
IT_ASSERT(checkValid(inputs)); IT_ASSERT(checkValid(inputs));
} }
string MatmulNode::toString() const { string MatmulNode::toString() const {
std::ostringstream os; std::ostringstream os;
MatmulArgs args = getArgs(); os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
os << "Matmul([" << (args.transA ? "A^T" : "A") << "," << ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
<< (args.transB ? "B^T" : "B") << ",act=" << (int)args.act << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
<< "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid() << ")";
<< ",C=" << outputs[0]->getGuid() << ")";
return os.str(); return os.str();
} }
@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
// return false; // return false;
IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3); IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
IT_ASSERT(A->getDims()[0] == B->getDims()[0]); IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) == IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
(args.transB ? B->getDims()[2] : B->getDims()[1])); (transB ? B->getDims()[2] : B->getDims()[1]));
// if (A->getDims().size() != 3 || B->getDims().size() != 3) { // if (A->getDims().size() != 3 || B->getDims().size() != 3) {
// return false; // return false;
// } // }
@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
// } // }
return true; return true;
} }
HashType MatmulNode::hashWithShape() const {
// TODO: use a real hash
return b + m + n + k + transA + transB + enum_to_underlying(act);
}
OpPerfKey MatmulNode::getOpAttrs() const {
return OpPerfKey(hashWithShape(), type,
{b, m, n, k, transA, transB, enum_to_underlying(act)});
}
} // namespace infini } // namespace infini