Add: exclude compile-time computable operator time

This commit is contained in:
Liyan Zheng 2023-04-17 13:24:14 +08:00
parent edf4e33353
commit bc31219bde
6 changed files with 39 additions and 14 deletions

View File

@ -16,7 +16,8 @@ class GraphObj : public Object {
string toString() const override;
Runtime getRuntime() const { return runtime; }
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32);
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32,
TensorType tensorType = TensorType::Other);
Tensor addTensor(const Tensor &tensor);
TensorVec addTensor(const TensorVec &tensors);
/**

View File

@ -12,13 +12,14 @@ namespace infini {
// TODO: how to deal with this
using ShapeElem = int;
using Shape = vector<ShapeElem>;
enum class TensorType { Input, Initialized, Other };
class TensorObj : public TensorBaseObj {
private:
Shape shape;
size_t _size; // Cache of Π(shape).
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id.
TensorType tensorType;
void copyin(const void *ptr, size_t size) {
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
}
@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj {
}
public:
TensorObj(Shape shape, DataType dtype, Runtime runtime);
TensorObj(Shape shape, DataType dtype, Runtime runtime,
TensorType tensorType = TensorType::Other);
virtual ~TensorObj() {}
string toString() const override;
@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj {
size_t getOffset(const vector<int> &ds) const;
void dataMalloc();
UidBaseType getFuid() const { return fuid; }
TensorType getTensorType() const { return tensorType; }
void load(std::string file_path);
void save(std::string file_path);

View File

@ -129,8 +129,9 @@ void GraphObj::dataMalloc() {
}
}
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) {
return tensors.emplace_back(
make_ref<TensorObj>(dim, dtype, runtime, tensorType));
}
Tensor GraphObj::addTensor(const Tensor &tensor) {

View File

@ -65,6 +65,21 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
double totalTime = 0;
std::map<OpType, double> opTime;
std::map<OpType, int> opCnt;
map<UidBaseType, bool> ctcMap; // compile-time computable
// Skip static computation
bool status = graph->topo_sort();
IT_ASSERT(status, "Topological sort failed");
for (auto &op : graph->getOperators()) {
bool compileTimeComputable = true;
for (auto input : op->getInputs()) {
// FIXME: propogate the tensor type. Current only the first operator
// after weights are compile-time computable.
if (input->getTensorType() != TensorType::Initialized)
compileTimeComputable = false;
}
ctcMap[op->getGuid()] = compileTimeComputable;
}
for (auto &op : graph->getOperators()) {
auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()};
@ -73,8 +88,9 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
auto perfData = perfEngine.getPerfData(perfKey);
double time = -1e9;
// Tune the kernel if there is no record
if (perfData) {
if (ctcMap[op->getGuid()]) { // Compile-time computable operators
time = 0;
} else if (perfData) { // Tune the kernel if there is no record
time = perfData->time;
} else if (allowEstimation && op->getOpType() == OpType::MemBound) {
time = as<MemBoundObj>(op)->getEstimatedTime();
@ -107,7 +123,7 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
totalTime += time;
if (profiling) {
op->print();
printf(" op_time %lf\n", time);
printf(" op_time %lf\n", time);
opTime[op->getOpType()] += time;
opCnt[op->getOpType()]++;
}

View File

@ -8,12 +8,14 @@
namespace infini {
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime,
TensorType tensorType)
: TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)),
_size(shape.empty()
? 0
: std::accumulate(shape.begin(), shape.end(), 1,
[](auto acc, auto x) { return acc * x; })) {}
[](auto acc, auto x) { return acc * x; })),
tensorType(tensorType) {}
string TensorObj::toString() const {
// Convert data pointer to string

View File

@ -5,12 +5,13 @@
#include "core/search_engine.h"
#include "cuda/cuda_runtime.h"
#include "nnet/nmutator.h"
#include "nnet/test.h"
#include "operators/conv.h"
#include "test.h"
namespace infini {
TEST(Mutator, NaiveConvWithInterpreter) {
TEST(NMutator, NaiveConvWithInterpreter) {
// verifyNaiveMembound True: subgraph after transformation
// verifyNaiveMembound False: subgraph of one single membound (eOP)
Runtime runtime = NativeCpuRuntimeObj::getInstance();
@ -55,7 +56,7 @@ TEST(Mutator, NaiveConvWithInterpreter) {
}
// FIXME: failed since implicit transpose for DLT
TEST(Mutator, InfoGAN_TConv_3_correctness) {
TEST(NMutator, InfoGAN_TConv_3_correctness) {
const bool useMutatorDirectly = false;
Runtime runtime = make_ref<CudaRuntimeObj>();
Graph g = make_ref<GraphObj>(runtime);
@ -67,8 +68,9 @@ TEST(Mutator, InfoGAN_TConv_3_correctness) {
// const int n = 1, c = 1, h = 2, w = 2, f = 1, r = 4, s = 4;
// const int n = 1, c = 2, h = 2, w = 2, f = 2, r = 4, s = 4;
auto i0 = g->addTensor({n, h, w, f});
auto w0 = g->addTensor({f, r, s, c});
auto i0 = g->addTensor({n, h, w, f}, DataType::Float32, TensorType::Input);
auto w0 =
g->addTensor({f, r, s, c}, DataType::Float32, TensorType::Initialized);
g->addOp<ConvTransposed2dNHWCObj>(i0, w0, nullptr, 1, 1, 2, 2, 1, 1);
auto mutator =