From bc31219bdedb36f9724dbc3c0be8fc409468ec5a Mon Sep 17 00:00:00 2001 From: Liyan Zheng Date: Mon, 17 Apr 2023 13:24:14 +0800 Subject: [PATCH] Add: exclude compile-time computable operator time --- include/core/graph.h | 3 ++- include/core/tensor.h | 7 +++++-- src/core/graph.cc | 5 +++-- src/core/runtime.cc | 22 +++++++++++++++++++--- src/core/tensor.cc | 6 ++++-- test/nnet/test_mutator.cc | 10 ++++++---- 6 files changed, 39 insertions(+), 14 deletions(-) diff --git a/include/core/graph.h b/include/core/graph.h index dab31d79..00fe2017 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -16,7 +16,8 @@ class GraphObj : public Object { string toString() const override; Runtime getRuntime() const { return runtime; } - Tensor addTensor(Shape dim, DataType dtype = DataType::Float32); + Tensor addTensor(Shape dim, DataType dtype = DataType::Float32, + TensorType tensorType = TensorType::Other); Tensor addTensor(const Tensor &tensor); TensorVec addTensor(const TensorVec &tensors); /** diff --git a/include/core/tensor.h b/include/core/tensor.h index a1081e15..bcc99a20 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -12,13 +12,14 @@ namespace infini { // TODO: how to deal with this using ShapeElem = int; using Shape = vector; +enum class TensorType { Input, Initialized, Other }; class TensorObj : public TensorBaseObj { private: Shape shape; size_t _size; // Cache of Π(shape). Fuid fuid; // Cloned tensors share the same id. Tensors constructed from // scratch have a new id. - + TensorType tensorType; void copyin(const void *ptr, size_t size) { runtime->copyBlobFromCPU(getRawDataPtr(), ptr, size); } @@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj { } public: - TensorObj(Shape shape, DataType dtype, Runtime runtime); + TensorObj(Shape shape, DataType dtype, Runtime runtime, + TensorType tensorType = TensorType::Other); virtual ~TensorObj() {} string toString() const override; @@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj { size_t getOffset(const vector &ds) const; void dataMalloc(); UidBaseType getFuid() const { return fuid; } + TensorType getTensorType() const { return tensorType; } void load(std::string file_path); void save(std::string file_path); diff --git a/src/core/graph.cc b/src/core/graph.cc index f52f8af7..17bcae78 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -129,8 +129,9 @@ void GraphObj::dataMalloc() { } } -Tensor GraphObj::addTensor(Shape dim, DataType dtype) { - return tensors.emplace_back(make_ref(dim, dtype, runtime)); +Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) { + return tensors.emplace_back( + make_ref(dim, dtype, runtime, tensorType)); } Tensor GraphObj::addTensor(const Tensor &tensor) { diff --git a/src/core/runtime.cc b/src/core/runtime.cc index 8151a6f0..3ea0112c 100644 --- a/src/core/runtime.cc +++ b/src/core/runtime.cc @@ -65,6 +65,21 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling, double totalTime = 0; std::map opTime; std::map opCnt; + map ctcMap; // compile-time computable + + // Skip static computation + bool status = graph->topo_sort(); + IT_ASSERT(status, "Topological sort failed"); + for (auto &op : graph->getOperators()) { + bool compileTimeComputable = true; + for (auto input : op->getInputs()) { + // FIXME: propogate the tensor type. Current only the first operator + // after weights are compile-time computable. + if (input->getTensorType() != TensorType::Initialized) + compileTimeComputable = false; + } + ctcMap[op->getGuid()] = compileTimeComputable; + } for (auto &op : graph->getOperators()) { auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()}; @@ -73,8 +88,9 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling, auto perfData = perfEngine.getPerfData(perfKey); double time = -1e9; - // Tune the kernel if there is no record - if (perfData) { + if (ctcMap[op->getGuid()]) { // Compile-time computable operators + time = 0; + } else if (perfData) { // Tune the kernel if there is no record time = perfData->time; } else if (allowEstimation && op->getOpType() == OpType::MemBound) { time = as(op)->getEstimatedTime(); @@ -107,7 +123,7 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling, totalTime += time; if (profiling) { op->print(); - printf(" op_time %lf\n", time); + printf(" op_time %lf\n", time); opTime[op->getOpType()] += time; opCnt[op->getOpType()]++; } diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 609b1720..23f56d64 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -8,12 +8,14 @@ namespace infini { -TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime) +TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime, + TensorType tensorType) : TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)), _size(shape.empty() ? 0 : std::accumulate(shape.begin(), shape.end(), 1, - [](auto acc, auto x) { return acc * x; })) {} + [](auto acc, auto x) { return acc * x; })), + tensorType(tensorType) {} string TensorObj::toString() const { // Convert data pointer to string diff --git a/test/nnet/test_mutator.cc b/test/nnet/test_mutator.cc index 07374554..1e2b0623 100644 --- a/test/nnet/test_mutator.cc +++ b/test/nnet/test_mutator.cc @@ -5,12 +5,13 @@ #include "core/search_engine.h" #include "cuda/cuda_runtime.h" #include "nnet/nmutator.h" +#include "nnet/test.h" #include "operators/conv.h" #include "test.h" namespace infini { -TEST(Mutator, NaiveConvWithInterpreter) { +TEST(NMutator, NaiveConvWithInterpreter) { // verifyNaiveMembound True: subgraph after transformation // verifyNaiveMembound False: subgraph of one single membound (eOP) Runtime runtime = NativeCpuRuntimeObj::getInstance(); @@ -55,7 +56,7 @@ TEST(Mutator, NaiveConvWithInterpreter) { } // FIXME: failed since implicit transpose for DLT -TEST(Mutator, InfoGAN_TConv_3_correctness) { +TEST(NMutator, InfoGAN_TConv_3_correctness) { const bool useMutatorDirectly = false; Runtime runtime = make_ref(); Graph g = make_ref(runtime); @@ -67,8 +68,9 @@ TEST(Mutator, InfoGAN_TConv_3_correctness) { // const int n = 1, c = 1, h = 2, w = 2, f = 1, r = 4, s = 4; // const int n = 1, c = 2, h = 2, w = 2, f = 2, r = 4, s = 4; - auto i0 = g->addTensor({n, h, w, f}); - auto w0 = g->addTensor({f, r, s, c}); + auto i0 = g->addTensor({n, h, w, f}, DataType::Float32, TensorType::Input); + auto w0 = + g->addTensor({f, r, s, c}, DataType::Float32, TensorType::Initialized); g->addOp(i0, w0, nullptr, 1, 1, 2, 2, 1, 1); auto mutator =