Update: OpAttrs -> OpPerfKey

This commit is contained in:
Liyan Zheng 2022-08-09 14:58:45 +08:00
parent b7e2096a26
commit 8b685ae4a6
11 changed files with 127 additions and 90 deletions

View File

@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)
if(BUILD_TEST)
# TODO: chekc set
set(BUILD_GMOCK
OFF
CACHE BOOL "Do not build gmock" FORCE)

View File

@ -16,8 +16,6 @@
namespace infini {
using std::list;
using std::map;
using std::nullopt;
using std::optional;
using std::pair;
using std::set;
using std::string;
@ -29,6 +27,7 @@ using std::vector;
// Aliases
using dtype = float;
using HashType = size_t; // compatible with std::hash
// Metaprogramming utilities
#define _CAT(A, B) A##B

View File

@ -4,6 +4,7 @@
namespace infini {
// TODO: graph should be attached to a context
class GraphNode : public Object {
protected:
TensorVec tensors;
@ -29,12 +30,18 @@ class GraphNode : public Object {
return tensor;
}
void updateConnection();
void dataMalloc();
private:
// TODO: updateConnection
/**
* @brief Add reverse connections and Op relationship in ctor.
*/
void updateConnection();
// TODO: move to another class
// bool exportOnnx(const char *path);
// bool importOnnx(const char *net);
};
} // namespace infini
} // namespace infini

View File

@ -23,38 +23,46 @@ class Kernel {
};
class KernelRegistry {
public:
using KernelRecord =
tuple<Kernel *const, const string, const int>; // Kernel, name, ID
private:
std::map<KernelAttrs, KernelRecord> kernels;
int nKernels = 0;
public:
~KernelRegistry() {
for (auto &[k, v] : kernels)
delete v;
delete std::get<0>(v);
}
static KernelRegistry &getInstance() {
static KernelRegistry instance;
return instance;
}
bool registerKernel(const KernelAttrs &key, Kernel *kernel) {
// TODO: kernels with priority
bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
// TODO: mutliple kernels support: priority and check name
IT_ASSERT(kernels.find(key) == kernels.end(),
"Kernel already registered");
kernels.emplace(key, kernel);
kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
return true;
}
Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
return std::get<0>(kernels.at(kernelAttrs));
}
const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
return kernels.at(kernelAttrs);
}
private:
std::map<KernelAttrs, Kernel *> kernels;
};
#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt) \
#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt) \
namespace infini { \
static const bool _CAT(_register_kernel_, cnt) = \
KernelRegistry::getInstance().registerKernel( \
KernelAttrs{device, opType, dataType}, new kernel()); \
KernelAttrs{device, opType, dataType}, new kernel(), name); \
}
#define REGISTER_KERNEL(device, opType, dataType, kernel) \
_REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__)
#define REGISTER_KERNEL(device, opType, dataType, kernel, name) \
_REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)
} // namespace infini

View File

@ -4,11 +4,16 @@
namespace infini {
class Mutator {
private:
int candidatesLimit;
// // Statistical data
// int numTotalCandidates;
public:
Mutator(){};
Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
virtual ~Mutator(){};
virtual vector<Graph> run(const Graph &in_graph) = 0;
};
} // namespace infini
} // namespace infini

View File

@ -94,18 +94,42 @@ enum class ActType {
Tanh,
};
struct OpAttrs {
struct OpPerfKey {
HashType hash;
OpType opType;
vector<int> attrs;
public:
virtual bool operator<(const OpAttrs &rhs) const {
IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch.");
// Empty OpAttrs are equal
OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
: hash(hash), opType(opType), attrs(attrs) {}
bool operator==(const OpPerfKey &rhs) const {
if (hash != rhs.hash)
return false;
if (opType != rhs.opType)
return false;
if (attrs != rhs.attrs)
return false;
return true;
}
// TODO: remove this function after we use unordered_map in PerfEngine
bool operator<(const OpPerfKey &rhs) const {
if (hash != rhs.hash)
return hash < rhs.hash;
if (opType != rhs.opType)
return opType < rhs.opType;
if (attrs.size() != rhs.attrs.size())
return attrs.size() < rhs.attrs.size();
for (size_t i = 0; i < attrs.size(); ++i)
if (attrs[i] != rhs.attrs[i])
return attrs[i] < rhs.attrs[i];
return false;
}
virtual ~OpAttrs() {}
};
class OperatorNode : public Object {
public:
friend class Kernel;
protected:
OpType type;
TensorVec inputs;
@ -117,7 +141,7 @@ class OperatorNode : public Object {
OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
: type(opType), inputs(inputs), outputs(outputs) {}
virtual vector<Shape> computeShape() const = 0;
virtual OpAttrs getOpAttrs() const = 0;
virtual OpPerfKey getOpAttrs() const = 0;
public: // check Op type
bool isLinearOp() const;
@ -143,6 +167,14 @@ class OperatorNode : public Object {
virtual int numInputs() const = 0;
virtual int numOutputs() const = 0;
virtual HashType hash() const { IT_TODO_HALT(); }
virtual HashType hashWithShape() const { IT_TODO_HALT(); }
};
} // namespace infini
} // namespace infini
namespace std {
template <> struct hash<infini::OpPerfKey> {
size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
};
} // namespace std

View File

@ -6,7 +6,9 @@ namespace infini {
class PerfEngine {
public:
using Key = std::pair<KernelAttrs, OpAttrs>;
// TODO: Key should be OpPerfKey + Context(maybe implicat) to support
// multiple candiate kernels.
using Key = std::pair<KernelAttrs, OpPerfKey>;
private:
map<Key, PerfRecord> data;

View File

@ -34,22 +34,13 @@ class TensorBaseNode : public Object {
// NotCounted,
// };
// // TODO: is more compute state needed?
// enum ComputeState {
// NotComputed,
// // Allocated,
// // Initialized,
// // ComputedPartial,
// ComputedFull,
// };
protected:
int dim;
DataType dtype;
vector<WRef<TensorBaseNode>> inputOf;
WRef<TensorBaseNode> outputOf;
// TODO: use a blob instead of vector
// TODO: Ref<void> -> Ref<Blob>
Ref<VType[]> data;
// ComputeState computed;
// static int random_seed[256 * 16];
@ -267,4 +258,4 @@ class TensorBaseNode : public Object {
// void printShape();
};
} // namespace infini
} // namespace infini

View File

@ -4,30 +4,15 @@
namespace infini {
class MatmulNode : public OperatorNode {
public:
struct MatmulArgs : public OpAttrs {
int b, m, n, k;
// PET assume a row-major tensor layout. transA=false means default
// dims, true means A should be transposed before matmul. This is in
// oppsite to column-major BLAS.
bool transA, transB;
ActType act;
MatmulArgs(int b, int m, int n, int k, bool transA, bool transB,
ActType act)
: b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) {
}
bool operator<(const OpAttrs &rhsGeneric) {
auto rhs = dynamic_cast<const MatmulArgs &>(rhsGeneric);
return std::tie(b, m, n, k, transA, transB, act) <
std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB,
rhs.act);
}
};
private:
MatmulArgs args;
// InfiniTensor assume a row-major tensor layout. transA=false means default
// dims, true means A should be transposed before matmul. This is in
// oppsite to column-major BLAS.
bool transA, transB;
ActType act;
// Auxiliary attributes
int b, m, n, k;
public:
MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode {
int numOutputs() const override { return 1; }
Tensor getBias() const { return inputs[2]; }
void setAct(ActType act) { this->args.act = act; }
ActType getAct() const { return args.act; }
bool getTransA() const { return args.transA; }
bool getTransB() const { return args.transB; }
ActType getAct() const { return act; }
bool getTransA() const { return transA; }
bool getTransB() const { return transB; }
int getB() const { return b; }
int getM() const { return m; }
int getN() const { return n; }
int getK() const { return k; }
MatmulArgs getArgs() const { return args; }
OpAttrs getOpAttrs() const override { return args; }
HashType hashWithShape() const override;
OpPerfKey getOpAttrs() const override;
private:
// Q: whether to check the output? Since we can build an Op first and then
// assure output.
// Fix 1: make shape inference a static method. But OpAttrs are required.
// Fix 1: make shape inference a static method. But OpPerfKey are required.
bool checkValid(const TensorVec &inputs) const;
};
} // namespace infini
} // namespace infini

View File

@ -9,10 +9,9 @@ template <typename T> class NaiveMatmul : public Kernel {
T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
const auto args = op->getArgs();
IT_ASSERT(args.transA == false && args.transB == false);
IT_ASSERT(args.act == ActType::None);
const int M = args.m, N = args.n, K = args.k;
IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
IT_ASSERT(op->getAct() == ActType::None);
const int M = op->getM(), N = op->getN(), K = op->getK();
for (int i = 0; i < M; i++) {
for (int j = 0; j < N; j++) {
C[i * N + j] = 0;
@ -33,8 +32,8 @@ template <typename T> class NaiveMatmul : public Kernel {
};
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
NaiveMatmul<uint32_t>);
NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
NaiveMatmul<float>);
NaiveMatmul<float>, "MatmulNaive_CPU_float32");
} // namespace infini

View File

@ -2,27 +2,24 @@
namespace infini {
vector<Shape> MatmulNode::computeShape() const {
Shape ret{args.b, args.m, args.n};
return {ret};
}
vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }
MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
Tensor bias, ActType act)
: OperatorNode(OpType::Matmul, {A, B, bias}, {C}),
args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1],
transB ? B->getDims()[1] : B->getDims()[2],
transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) {
: OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
transB(transB), act(act), b(A->getDims()[0]),
m(transA ? A->getDims()[2] : A->getDims()[1]),
n(transB ? B->getDims()[1] : B->getDims()[2]),
k(transA ? A->getDims()[1] : A->getDims()[2]) {
IT_ASSERT(checkValid(inputs));
}
string MatmulNode::toString() const {
std::ostringstream os;
MatmulArgs args = getArgs();
os << "Matmul([" << (args.transA ? "A^T" : "A") << ","
<< (args.transB ? "B^T" : "B") << ",act=" << (int)args.act
<< "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid()
<< ",C=" << outputs[0]->getGuid() << ")";
os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
<< ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
<< ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
<< ")";
return os.str();
}
@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
// return false;
IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) ==
(args.transB ? B->getDims()[2] : B->getDims()[1]));
IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
(transB ? B->getDims()[2] : B->getDims()[1]));
// if (A->getDims().size() != 3 || B->getDims().size() != 3) {
// return false;
// }
@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
// }
return true;
}
HashType MatmulNode::hashWithShape() const {
// TODO: use a real hash
return b + m + n + k + transA + transB + enum_to_underlying(act);
}
OpPerfKey MatmulNode::getOpAttrs() const {
return OpPerfKey(hashWithShape(), type,
{b, m, n, k, transA, transB, enum_to_underlying(act)});
}
} // namespace infini