forked from jiuyuan/InfiniTensor
Update: OpAttrs -> OpPerfKey
This commit is contained in:
parent
b7e2096a26
commit
8b685ae4a6
|
@ -42,7 +42,6 @@ add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
|
|||
include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)
|
||||
|
||||
if(BUILD_TEST)
|
||||
# TODO: chekc set
|
||||
set(BUILD_GMOCK
|
||||
OFF
|
||||
CACHE BOOL "Do not build gmock" FORCE)
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
namespace infini {
|
||||
using std::list;
|
||||
using std::map;
|
||||
using std::nullopt;
|
||||
using std::optional;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
|
@ -29,6 +27,7 @@ using std::vector;
|
|||
|
||||
// Aliases
|
||||
using dtype = float;
|
||||
using HashType = size_t; // compatible with std::hash
|
||||
|
||||
// Metaprogramming utilities
|
||||
#define _CAT(A, B) A##B
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
namespace infini {
|
||||
|
||||
// TODO: graph should be attached to a context
|
||||
class GraphNode : public Object {
|
||||
protected:
|
||||
TensorVec tensors;
|
||||
|
@ -29,12 +30,18 @@ class GraphNode : public Object {
|
|||
return tensor;
|
||||
}
|
||||
|
||||
void updateConnection();
|
||||
void dataMalloc();
|
||||
|
||||
private:
|
||||
// TODO: updateConnection
|
||||
/**
|
||||
* @brief Add reverse connections and Op relationship in ctor.
|
||||
*/
|
||||
void updateConnection();
|
||||
|
||||
// TODO: move to another class
|
||||
// bool exportOnnx(const char *path);
|
||||
// bool importOnnx(const char *net);
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
|
|
@ -23,38 +23,46 @@ class Kernel {
|
|||
};
|
||||
|
||||
class KernelRegistry {
|
||||
public:
|
||||
using KernelRecord =
|
||||
tuple<Kernel *const, const string, const int>; // Kernel, name, ID
|
||||
|
||||
private:
|
||||
std::map<KernelAttrs, KernelRecord> kernels;
|
||||
int nKernels = 0;
|
||||
|
||||
public:
|
||||
~KernelRegistry() {
|
||||
for (auto &[k, v] : kernels)
|
||||
delete v;
|
||||
delete std::get<0>(v);
|
||||
}
|
||||
static KernelRegistry &getInstance() {
|
||||
static KernelRegistry instance;
|
||||
return instance;
|
||||
}
|
||||
bool registerKernel(const KernelAttrs &key, Kernel *kernel) {
|
||||
// TODO: kernels with priority
|
||||
bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
|
||||
// TODO: mutliple kernels support: priority and check name
|
||||
IT_ASSERT(kernels.find(key) == kernels.end(),
|
||||
"Kernel already registered");
|
||||
kernels.emplace(key, kernel);
|
||||
kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
|
||||
return true;
|
||||
}
|
||||
Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
|
||||
return std::get<0>(kernels.at(kernelAttrs));
|
||||
}
|
||||
const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
|
||||
return kernels.at(kernelAttrs);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<KernelAttrs, Kernel *> kernels;
|
||||
};
|
||||
|
||||
#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, cnt) \
|
||||
#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt) \
|
||||
namespace infini { \
|
||||
static const bool _CAT(_register_kernel_, cnt) = \
|
||||
KernelRegistry::getInstance().registerKernel( \
|
||||
KernelAttrs{device, opType, dataType}, new kernel()); \
|
||||
KernelAttrs{device, opType, dataType}, new kernel(), name); \
|
||||
}
|
||||
|
||||
#define REGISTER_KERNEL(device, opType, dataType, kernel) \
|
||||
_REGISTER_KERNEL_1(device, opType, dataType, kernel, __COUNTER__)
|
||||
#define REGISTER_KERNEL(device, opType, dataType, kernel, name) \
|
||||
_REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)
|
||||
|
||||
} // namespace infini
|
||||
|
|
|
@ -4,11 +4,16 @@
|
|||
namespace infini {
|
||||
|
||||
class Mutator {
|
||||
private:
|
||||
int candidatesLimit;
|
||||
// // Statistical data
|
||||
// int numTotalCandidates;
|
||||
|
||||
public:
|
||||
Mutator(){};
|
||||
Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
|
||||
virtual ~Mutator(){};
|
||||
|
||||
virtual vector<Graph> run(const Graph &in_graph) = 0;
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
|
|
@ -94,18 +94,42 @@ enum class ActType {
|
|||
Tanh,
|
||||
};
|
||||
|
||||
struct OpAttrs {
|
||||
struct OpPerfKey {
|
||||
HashType hash;
|
||||
OpType opType;
|
||||
vector<int> attrs;
|
||||
|
||||
public:
|
||||
virtual bool operator<(const OpAttrs &rhs) const {
|
||||
IT_ASSERT(typeid(*this) == typeid(rhs), "OpAttrs type mismatch.");
|
||||
// Empty OpAttrs are equal
|
||||
OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
|
||||
: hash(hash), opType(opType), attrs(attrs) {}
|
||||
bool operator==(const OpPerfKey &rhs) const {
|
||||
if (hash != rhs.hash)
|
||||
return false;
|
||||
if (opType != rhs.opType)
|
||||
return false;
|
||||
if (attrs != rhs.attrs)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: remove this function after we use unordered_map in PerfEngine
|
||||
bool operator<(const OpPerfKey &rhs) const {
|
||||
if (hash != rhs.hash)
|
||||
return hash < rhs.hash;
|
||||
if (opType != rhs.opType)
|
||||
return opType < rhs.opType;
|
||||
if (attrs.size() != rhs.attrs.size())
|
||||
return attrs.size() < rhs.attrs.size();
|
||||
for (size_t i = 0; i < attrs.size(); ++i)
|
||||
if (attrs[i] != rhs.attrs[i])
|
||||
return attrs[i] < rhs.attrs[i];
|
||||
return false;
|
||||
}
|
||||
virtual ~OpAttrs() {}
|
||||
};
|
||||
|
||||
class OperatorNode : public Object {
|
||||
public:
|
||||
friend class Kernel;
|
||||
|
||||
protected:
|
||||
OpType type;
|
||||
TensorVec inputs;
|
||||
|
@ -117,7 +141,7 @@ class OperatorNode : public Object {
|
|||
OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
|
||||
: type(opType), inputs(inputs), outputs(outputs) {}
|
||||
virtual vector<Shape> computeShape() const = 0;
|
||||
virtual OpAttrs getOpAttrs() const = 0;
|
||||
virtual OpPerfKey getOpAttrs() const = 0;
|
||||
|
||||
public: // check Op type
|
||||
bool isLinearOp() const;
|
||||
|
@ -143,6 +167,14 @@ class OperatorNode : public Object {
|
|||
|
||||
virtual int numInputs() const = 0;
|
||||
virtual int numOutputs() const = 0;
|
||||
virtual HashType hash() const { IT_TODO_HALT(); }
|
||||
virtual HashType hashWithShape() const { IT_TODO_HALT(); }
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<infini::OpPerfKey> {
|
||||
size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
|
||||
};
|
||||
} // namespace std
|
|
@ -6,7 +6,9 @@ namespace infini {
|
|||
|
||||
class PerfEngine {
|
||||
public:
|
||||
using Key = std::pair<KernelAttrs, OpAttrs>;
|
||||
// TODO: Key should be OpPerfKey + Context(maybe implicat) to support
|
||||
// multiple candiate kernels.
|
||||
using Key = std::pair<KernelAttrs, OpPerfKey>;
|
||||
|
||||
private:
|
||||
map<Key, PerfRecord> data;
|
||||
|
|
|
@ -34,22 +34,13 @@ class TensorBaseNode : public Object {
|
|||
// NotCounted,
|
||||
// };
|
||||
|
||||
// // TODO: is more compute state needed?
|
||||
// enum ComputeState {
|
||||
// NotComputed,
|
||||
// // Allocated,
|
||||
// // Initialized,
|
||||
// // ComputedPartial,
|
||||
// ComputedFull,
|
||||
// };
|
||||
|
||||
protected:
|
||||
int dim;
|
||||
|
||||
DataType dtype;
|
||||
vector<WRef<TensorBaseNode>> inputOf;
|
||||
WRef<TensorBaseNode> outputOf;
|
||||
// TODO: use a blob instead of vector
|
||||
// TODO: Ref<void> -> Ref<Blob>
|
||||
Ref<VType[]> data;
|
||||
// ComputeState computed;
|
||||
// static int random_seed[256 * 16];
|
||||
|
@ -267,4 +258,4 @@ class TensorBaseNode : public Object {
|
|||
// void printShape();
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
|
|
@ -4,30 +4,15 @@
|
|||
namespace infini {
|
||||
|
||||
class MatmulNode : public OperatorNode {
|
||||
public:
|
||||
struct MatmulArgs : public OpAttrs {
|
||||
int b, m, n, k;
|
||||
// PET assume a row-major tensor layout. transA=false means default
|
||||
// dims, true means A should be transposed before matmul. This is in
|
||||
// oppsite to column-major BLAS.
|
||||
bool transA, transB;
|
||||
ActType act;
|
||||
|
||||
MatmulArgs(int b, int m, int n, int k, bool transA, bool transB,
|
||||
ActType act)
|
||||
: b(b), m(m), n(n), k(k), transA(transA), transB(transB), act(act) {
|
||||
}
|
||||
|
||||
bool operator<(const OpAttrs &rhsGeneric) {
|
||||
auto rhs = dynamic_cast<const MatmulArgs &>(rhsGeneric);
|
||||
return std::tie(b, m, n, k, transA, transB, act) <
|
||||
std::tie(rhs.b, rhs.m, rhs.n, rhs.k, rhs.transA, rhs.transB,
|
||||
rhs.act);
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
MatmulArgs args;
|
||||
// InfiniTensor assume a row-major tensor layout. transA=false means default
|
||||
// dims, true means A should be transposed before matmul. This is in
|
||||
// oppsite to column-major BLAS.
|
||||
bool transA, transB;
|
||||
ActType act;
|
||||
|
||||
// Auxiliary attributes
|
||||
int b, m, n, k;
|
||||
|
||||
public:
|
||||
MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
|
||||
|
@ -41,19 +26,22 @@ class MatmulNode : public OperatorNode {
|
|||
int numOutputs() const override { return 1; }
|
||||
|
||||
Tensor getBias() const { return inputs[2]; }
|
||||
void setAct(ActType act) { this->args.act = act; }
|
||||
ActType getAct() const { return args.act; }
|
||||
bool getTransA() const { return args.transA; }
|
||||
bool getTransB() const { return args.transB; }
|
||||
ActType getAct() const { return act; }
|
||||
bool getTransA() const { return transA; }
|
||||
bool getTransB() const { return transB; }
|
||||
int getB() const { return b; }
|
||||
int getM() const { return m; }
|
||||
int getN() const { return n; }
|
||||
int getK() const { return k; }
|
||||
|
||||
MatmulArgs getArgs() const { return args; }
|
||||
OpAttrs getOpAttrs() const override { return args; }
|
||||
HashType hashWithShape() const override;
|
||||
OpPerfKey getOpAttrs() const override;
|
||||
|
||||
private:
|
||||
// Q: whether to check the output? Since we can build an Op first and then
|
||||
// assure output.
|
||||
// Fix 1: make shape inference a static method. But OpAttrs are required.
|
||||
// Fix 1: make shape inference a static method. But OpPerfKey are required.
|
||||
bool checkValid(const TensorVec &inputs) const;
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
|
|
@ -9,10 +9,9 @@ template <typename T> class NaiveMatmul : public Kernel {
|
|||
T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
|
||||
T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
|
||||
T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
|
||||
const auto args = op->getArgs();
|
||||
IT_ASSERT(args.transA == false && args.transB == false);
|
||||
IT_ASSERT(args.act == ActType::None);
|
||||
const int M = args.m, N = args.n, K = args.k;
|
||||
IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
|
||||
IT_ASSERT(op->getAct() == ActType::None);
|
||||
const int M = op->getM(), N = op->getN(), K = op->getK();
|
||||
for (int i = 0; i < M; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
C[i * N + j] = 0;
|
||||
|
@ -33,8 +32,8 @@ template <typename T> class NaiveMatmul : public Kernel {
|
|||
};
|
||||
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
|
||||
NaiveMatmul<uint32_t>);
|
||||
NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
|
||||
NaiveMatmul<float>);
|
||||
NaiveMatmul<float>, "MatmulNaive_CPU_float32");
|
||||
|
||||
} // namespace infini
|
|
@ -2,27 +2,24 @@
|
|||
|
||||
namespace infini {
|
||||
|
||||
vector<Shape> MatmulNode::computeShape() const {
|
||||
Shape ret{args.b, args.m, args.n};
|
||||
return {ret};
|
||||
}
|
||||
vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }
|
||||
|
||||
MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
|
||||
Tensor bias, ActType act)
|
||||
: OperatorNode(OpType::Matmul, {A, B, bias}, {C}),
|
||||
args(A->getDims()[0], transA ? A->getDims()[2] : A->getDims()[1],
|
||||
transB ? B->getDims()[1] : B->getDims()[2],
|
||||
transA ? A->getDims()[1] : A->getDims()[2], transA, transB, act) {
|
||||
: OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
|
||||
transB(transB), act(act), b(A->getDims()[0]),
|
||||
m(transA ? A->getDims()[2] : A->getDims()[1]),
|
||||
n(transB ? B->getDims()[1] : B->getDims()[2]),
|
||||
k(transA ? A->getDims()[1] : A->getDims()[2]) {
|
||||
IT_ASSERT(checkValid(inputs));
|
||||
}
|
||||
|
||||
string MatmulNode::toString() const {
|
||||
std::ostringstream os;
|
||||
MatmulArgs args = getArgs();
|
||||
os << "Matmul([" << (args.transA ? "A^T" : "A") << ","
|
||||
<< (args.transB ? "B^T" : "B") << ",act=" << (int)args.act
|
||||
<< "],A=" << inputs[0]->getGuid() << ",B=" << inputs[1]->getGuid()
|
||||
<< ",C=" << outputs[0]->getGuid() << ")";
|
||||
os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
|
||||
<< ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
|
||||
<< ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
|
||||
<< ")";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
|
@ -32,8 +29,8 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
|
|||
// return false;
|
||||
IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
|
||||
IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
|
||||
IT_ASSERT((args.transA ? A->getDims()[1] : A->getDims()[2]) ==
|
||||
(args.transB ? B->getDims()[2] : B->getDims()[1]));
|
||||
IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
|
||||
(transB ? B->getDims()[2] : B->getDims()[1]));
|
||||
// if (A->getDims().size() != 3 || B->getDims().size() != 3) {
|
||||
// return false;
|
||||
// }
|
||||
|
@ -46,4 +43,14 @@ bool MatmulNode::checkValid(const TensorVec &inputs) const {
|
|||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
HashType MatmulNode::hashWithShape() const {
|
||||
// TODO: use a real hash
|
||||
return b + m + n + k + transA + transB + enum_to_underlying(act);
|
||||
}
|
||||
|
||||
OpPerfKey MatmulNode::getOpAttrs() const {
|
||||
return OpPerfKey(hashWithShape(), type,
|
||||
{b, m, n, k, transA, transB, enum_to_underlying(act)});
|
||||
}
|
||||
} // namespace infini
|
Loading…
Reference in New Issue