forked from jiuyuan/InfiniTensor
Add: exclude compile-time computable operator time
This commit is contained in:
parent
edf4e33353
commit
bc31219bde
|
@ -16,7 +16,8 @@ class GraphObj : public Object {
|
|||
string toString() const override;
|
||||
Runtime getRuntime() const { return runtime; }
|
||||
|
||||
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32);
|
||||
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32,
|
||||
TensorType tensorType = TensorType::Other);
|
||||
Tensor addTensor(const Tensor &tensor);
|
||||
TensorVec addTensor(const TensorVec &tensors);
|
||||
/**
|
||||
|
|
|
@ -12,13 +12,14 @@ namespace infini {
|
|||
// TODO: how to deal with this
|
||||
using ShapeElem = int;
|
||||
using Shape = vector<ShapeElem>;
|
||||
enum class TensorType { Input, Initialized, Other };
|
||||
class TensorObj : public TensorBaseObj {
|
||||
private:
|
||||
Shape shape;
|
||||
size_t _size; // Cache of Π(shape).
|
||||
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
|
||||
// scratch have a new id.
|
||||
|
||||
TensorType tensorType;
|
||||
void copyin(const void *ptr, size_t size) {
|
||||
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
|
||||
}
|
||||
|
@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj {
|
|||
}
|
||||
|
||||
public:
|
||||
TensorObj(Shape shape, DataType dtype, Runtime runtime);
|
||||
TensorObj(Shape shape, DataType dtype, Runtime runtime,
|
||||
TensorType tensorType = TensorType::Other);
|
||||
virtual ~TensorObj() {}
|
||||
string toString() const override;
|
||||
|
||||
|
@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj {
|
|||
size_t getOffset(const vector<int> &ds) const;
|
||||
void dataMalloc();
|
||||
UidBaseType getFuid() const { return fuid; }
|
||||
TensorType getTensorType() const { return tensorType; }
|
||||
|
||||
void load(std::string file_path);
|
||||
void save(std::string file_path);
|
||||
|
|
|
@ -129,8 +129,9 @@ void GraphObj::dataMalloc() {
|
|||
}
|
||||
}
|
||||
|
||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
||||
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
|
||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) {
|
||||
return tensors.emplace_back(
|
||||
make_ref<TensorObj>(dim, dtype, runtime, tensorType));
|
||||
}
|
||||
|
||||
Tensor GraphObj::addTensor(const Tensor &tensor) {
|
||||
|
|
|
@ -65,6 +65,21 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
|
|||
double totalTime = 0;
|
||||
std::map<OpType, double> opTime;
|
||||
std::map<OpType, int> opCnt;
|
||||
map<UidBaseType, bool> ctcMap; // compile-time computable
|
||||
|
||||
// Skip static computation
|
||||
bool status = graph->topo_sort();
|
||||
IT_ASSERT(status, "Topological sort failed");
|
||||
for (auto &op : graph->getOperators()) {
|
||||
bool compileTimeComputable = true;
|
||||
for (auto input : op->getInputs()) {
|
||||
// FIXME: propogate the tensor type. Current only the first operator
|
||||
// after weights are compile-time computable.
|
||||
if (input->getTensorType() != TensorType::Initialized)
|
||||
compileTimeComputable = false;
|
||||
}
|
||||
ctcMap[op->getGuid()] = compileTimeComputable;
|
||||
}
|
||||
|
||||
for (auto &op : graph->getOperators()) {
|
||||
auto kernelAttrs = KernelAttrs{device, op->getOpType(), op->getDType()};
|
||||
|
@ -73,8 +88,9 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
|
|||
auto perfData = perfEngine.getPerfData(perfKey);
|
||||
|
||||
double time = -1e9;
|
||||
// Tune the kernel if there is no record
|
||||
if (perfData) {
|
||||
if (ctcMap[op->getGuid()]) { // Compile-time computable operators
|
||||
time = 0;
|
||||
} else if (perfData) { // Tune the kernel if there is no record
|
||||
time = perfData->time;
|
||||
} else if (allowEstimation && op->getOpType() == OpType::MemBound) {
|
||||
time = as<MemBoundObj>(op)->getEstimatedTime();
|
||||
|
@ -107,7 +123,7 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
|
|||
totalTime += time;
|
||||
if (profiling) {
|
||||
op->print();
|
||||
printf(" op_time %lf\n", time);
|
||||
printf(" op_time %lf\n", time);
|
||||
opTime[op->getOpType()] += time;
|
||||
opCnt[op->getOpType()]++;
|
||||
}
|
||||
|
|
|
@ -8,12 +8,14 @@
|
|||
|
||||
namespace infini {
|
||||
|
||||
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
|
||||
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime,
|
||||
TensorType tensorType)
|
||||
: TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)),
|
||||
_size(shape.empty()
|
||||
? 0
|
||||
: std::accumulate(shape.begin(), shape.end(), 1,
|
||||
[](auto acc, auto x) { return acc * x; })) {}
|
||||
[](auto acc, auto x) { return acc * x; })),
|
||||
tensorType(tensorType) {}
|
||||
|
||||
string TensorObj::toString() const {
|
||||
// Convert data pointer to string
|
||||
|
|
|
@ -5,12 +5,13 @@
|
|||
#include "core/search_engine.h"
|
||||
#include "cuda/cuda_runtime.h"
|
||||
#include "nnet/nmutator.h"
|
||||
#include "nnet/test.h"
|
||||
#include "operators/conv.h"
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
TEST(Mutator, NaiveConvWithInterpreter) {
|
||||
TEST(NMutator, NaiveConvWithInterpreter) {
|
||||
// verifyNaiveMembound True: subgraph after transformation
|
||||
// verifyNaiveMembound False: subgraph of one single membound (eOP)
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
|
@ -55,7 +56,7 @@ TEST(Mutator, NaiveConvWithInterpreter) {
|
|||
}
|
||||
|
||||
// FIXME: failed since implicit transpose for DLT
|
||||
TEST(Mutator, InfoGAN_TConv_3_correctness) {
|
||||
TEST(NMutator, InfoGAN_TConv_3_correctness) {
|
||||
const bool useMutatorDirectly = false;
|
||||
Runtime runtime = make_ref<CudaRuntimeObj>();
|
||||
Graph g = make_ref<GraphObj>(runtime);
|
||||
|
@ -67,8 +68,9 @@ TEST(Mutator, InfoGAN_TConv_3_correctness) {
|
|||
// const int n = 1, c = 1, h = 2, w = 2, f = 1, r = 4, s = 4;
|
||||
// const int n = 1, c = 2, h = 2, w = 2, f = 2, r = 4, s = 4;
|
||||
|
||||
auto i0 = g->addTensor({n, h, w, f});
|
||||
auto w0 = g->addTensor({f, r, s, c});
|
||||
auto i0 = g->addTensor({n, h, w, f}, DataType::Float32, TensorType::Input);
|
||||
auto w0 =
|
||||
g->addTensor({f, r, s, c}, DataType::Float32, TensorType::Initialized);
|
||||
g->addOp<ConvTransposed2dNHWCObj>(i0, w0, nullptr, 1, 1, 2, 2, 1, 1);
|
||||
|
||||
auto mutator =
|
||||
|
|
Loading…
Reference in New Issue