Add: NMutator::memboundToJson to export memboundOp

This commit is contained in:
Liyan Zheng 2023-04-18 09:56:14 +08:00
parent 885a978016
commit 37f3e9cf22
10 changed files with 212 additions and 50 deletions

View File

@ -12,7 +12,7 @@ namespace infini {
// TODO: how to deal with this
using ShapeElem = int;
using Shape = vector<ShapeElem>;
enum class TensorType { Input, Initialized, Other };
enum class TensorType { Error = 0, Input = 1, Initialized = 2, Other = 3 };
class TensorObj : public TensorBaseObj {
private:
Shape shape;

View File

@ -20,6 +20,7 @@ class Serializer : public Functor<string()> {
string visit_(const Subscript &c) override;
string visit_(const Var &c) override;
string visit_(const Tensor &c) override;
string visit_(const Func &c) override;
string dispatchRoutine(const Routine &c);
Expr buildExprTree(string key);

View File

@ -104,7 +104,7 @@ enum class NodeType {
FuncNodeType
};
enum class FuncType { Relu, Tanh, PRelu };
enum class FuncType { Relu = 1000, Tanh, PRelu };
#define DEFINE_GETTYPE(CLASS, isScalar_v) \
NodeType getType() const override { return NodeType::CLASS##Type; } \

View File

@ -32,6 +32,8 @@ class NMutator : public Mutator {
long long cntStates = 0;
long long cntCandidates = 0;
static void memboundToJson(const Graph &g, const string path);
private:
int maxDepth = 8;
nnet::Expr opToExpression(Operator op);
@ -57,8 +59,8 @@ class NMutator : public Mutator {
// TODO: recover these rules
// Graph fuseHetConv(nnet::Expr expr, Graph in_graph);
// Graph transformTConv1x1(Operator op);
// Graph transformTConv3x3(Operator op);
Graph transformConvtransposed1x1(Operator _op);
// Graph transformConvtransposed(Operator op);
// Graph transformDialtedConv(Operator op);
// Graph transformConv1x1(Operator op);
// Graph transformConv1xk(Operator op);

View File

@ -33,6 +33,7 @@ class MemBoundObj : public OperatorObj {
return {expr, hash};
}
double getEstimatedTime() const { return exec_time; }
void saveAsJson(string path) const;
private:
vector<int> getWorkloadVector() const override;

View File

@ -26,7 +26,8 @@ string TensorObj::toString() const {
ss << "nullptr data";
string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
std::to_string(fuid) + ", shape " + vecToString(shape) +
", dtype " + dtype.toString();
", dtype " + dtype.toString() + ", tensorType " +
std::to_string(enum_to_underlying(tensorType));
vector<UidBaseType> targetGuids;
for (const auto &op : targets)
targetGuids.emplace_back(op.lock()->getGuid());

View File

@ -247,7 +247,8 @@ void init_graph_builder(py::module &m) {
py::class_<NMutator, Ref<NMutator>, Mutator>(m, "NMutator")
.def(py::init<NMutator::Mode>())
.def(py::init<NMutator::Mode, vector<int>>())
.def("run", &NMutator::run);
.def("run", &NMutator::run)
.def_static("memboundToJson", &NMutator::memboundToJson);
py::class_<SearchEngine>(m, "SearchEngine")
.def(py::init<Runtime, Ref<Mutator>>())
.def("run", &SearchEngine::run);

View File

@ -4,6 +4,7 @@
#include "core/runtime.h"
#include "core/search_engine.h"
#include "cuda/cuda_runtime.h"
#include "ffi/ffi_callback.h"
#include "nnet/nmutator.h"
#include "operators/conv.h"
#include "operators/unary.h"
@ -23,12 +24,43 @@ Graph getInfoGAN(int batch, Runtime runtime, int nLayers) {
{64, 4, 1, 2, false}, {32, 4, 1, 2, true},
};
Tensor input = g->addTensor({batch, 1, 1, 228});
Tensor input =
g->addTensor({batch, 1, 1, 228}, DataType::Float32, TensorType::Input);
for (int i = 0; i < (int)cs.size() && i < nLayers; ++i) {
auto [channel, kernelSize, pad, stride, tanh] = cs[i];
int f = input->getDims()[3]; // n, h, w, f
auto weight =
g->addTensor({f, kernelSize, kernelSize, channel}); // f, r, s, c
auto weight = g->addTensor({f, kernelSize, kernelSize, channel},
DataType::Float32,
TensorType::Initialized); // f, r, s, c
input = g->addOp<ConvTransposed2dNHWCObj>(input, weight, nullptr, pad,
pad, stride, stride, 1, 1)
->getOutput();
if (tanh) {
input = g->addOp<TanhObj>(input, nullptr)->getOutput();
} else {
input = g->addOp<ReluObj>(input, nullptr)->getOutput();
}
}
return g;
}
Graph getConvtransposedNHWC(Runtime runtime, Shape shape, int layerId) {
IT_ASSERT(0 <= layerId && layerId < 5);
Graph g = make_ref<GraphObj>(runtime);
vector<Tensor> weights;
vector<tuple<int, int, int, int, bool>> cs{
// Channel, kernelSize, pad, stride, isTanh
{448, 2, 0, 1, false}, {256, 4, 1, 2, false}, {128, 4, 1, 2, false},
{64, 4, 1, 2, false}, {32, 4, 1, 2, true},
};
Tensor input = g->addTensor(shape, DataType::Float32, TensorType::Input);
for (int i = layerId; i < layerId + 1; ++i) {
auto [channel, kernelSize, pad, stride, tanh] = cs[i];
int f = input->getDims()[3]; // n, h, w, f
auto weight = g->addTensor({f, kernelSize, kernelSize, channel},
DataType::Float32,
TensorType::Initialized); // f, r, s, c
input = g->addOp<ConvTransposed2dNHWCObj>(input, weight, nullptr, pad,
pad, stride, stride, 1, 1)
->getOutput();
@ -50,6 +82,77 @@ void printGraph(Graph g) {
}
}
Graph optimizeGraph(Graph g, Runtime runtime, bool tuning) {
Runtime cpu = NativeCpuRuntimeObj::getInstance();
Graph gCpu = make_ref<GraphObj>(cpu);
auto mutator =
make_ref<NMutator>(NMutator::Mode::RuleBased,
vector<int>{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90});
vector<Graph> bestGraphs;
SearchEngine searchEngine(runtime, mutator);
bestGraphs.emplace_back(searchEngine.run(g));
g->topo_sort();
dbg(g, bestGraphs[0], bestGraphs.size());
g->print();
g->dataMalloc();
map<UidBaseType, Tensor> fuidToInputTensor;
for (auto t : g->getInputs()) {
IT_ASSERT(fuidToInputTensor.count(t->getFuid()) == 0);
fuidToInputTensor[t->getFuid()] = t;
}
auto gen = RandomGenerator(-0.1, 0.1, 0);
for (auto t : g->getInputs()) {
t->setData(gen);
}
for (auto t : g->getOutputs()) {
t->setData(ZeroGenerator());
}
runtime->run(g);
dbg("Baseline graph");
printGraph(g);
dbg(runtime->getPerfTime(g, true));
for (size_t i = 0; i < bestGraphs.size(); i++) {
auto bestGraphCpu = bestGraphs[i];
auto bestGraph =
make_ref<GraphObj>(runtime, bestGraphCpu->getOperators());
bestGraph->topo_sort();
bestGraph->dataMalloc();
// Initialize inputs with random data
for (auto t : bestGraph->getInputs()) {
t->copyData(fuidToInputTensor[t->getFuid()]);
}
// Initialize outputs with zeros
for (auto t : bestGraph->getOutputs()) {
t->setData(ZeroGenerator());
}
dbg(bestGraph);
dbg(bestGraph->getOutputs());
if (tuning) {
runtime->run(bestGraph, true); // Tune kernels
runtime->run(bestGraph, false); // Execute transfomraed graph
auto go0 = gCpu->cloneTensor(g->getOutputs()[0]);
auto bgo0 = gCpu->cloneTensor(bestGraph->getOutputs()[0]);
// EXPECT_TRUE(go0->equalData(bgo0, 1e-3));
dbg(go0->equalData(bgo0, 1e-3));
dbg(runtime->getPerfTime(bestGraph, true));
}
dbg("Best graph");
printGraph(bestGraph);
return bestGraph;
}
return nullptr;
}
vector<Tensor> runInfoGAN(int nLayers) {
Runtime cuda = make_ref<CudaRuntimeObj>();
Runtime cpu = NativeCpuRuntimeObj::getInstance();
@ -122,6 +225,7 @@ vector<Tensor> runInfoGAN(int nLayers) {
dbg("Best graph");
printGraph(bestGraph);
callback::exportONNX(bestGraph, "best_graph.onnx"); // Debug
return {g->getOutputs()[0], bestGraph->getOutputs()[0]};
}
return {};

View File

@ -78,16 +78,14 @@ void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
OpVec computeOps = in_graph->getComputeOps();
IT_ASSERT(computeOps.size() == 1);
/* if (infini::Graph g = transformTConv1x1(computeOps[0])) {
// if (infini::Graph g = transformTConv1x1(computeOps[0])) {
// out_graphs.emplace_back(g);
// return;
// }
// // Commented for debug, not implemented yet
// // if (infini::Graph g = transformTConv3x3(computeOps[0])) {
// // Graph graph = new Graph(g->getOperators());
// // out_graphs.emplace_back(graph);
// // return;
// // }
if (Graph g = transformConvtransposed1x1(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
// if (infini::Graph g = transformDialtedConv(computeOps[0])) {
// out_graphs.emplace_back(g);
// return;
@ -519,43 +517,82 @@ double NMutator::memboundTime(const Shape &dims) {
// return nullptr;
// }
// Graph NMutator::transformTConv3x3(Operator op) {
// if (auto tconvOp = dynamic_cast<ConvTransOp *>(op)) {
// dbg(tconvOp->getInputs()[1]->getDims());
// if (tconvOp->getPh() == 1 && tconvOp->getSh() == 2 &&
// tconvOp->getInputs()[1]->getDims()[0] == 3 &&
// tconvOp->getInputs()[1]->getDims()[1] == 3) {
// auto g = new infini::Graph();
// auto inputDims = tconvOp->getInputs(0)->getDims();
// auto weightDims = tconvOp->getInputs(1)->getDims();
// auto outputDims = tconvOp->getOutput()->getDims();
// // NHWF
// auto newA = g->tensor(
// {inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]});
// // RSFC
// auto newW = g->tensor(
// {weightDims[0] * weightDims[1] * weightDims[3],
// weightDims[2]});
// auto newO =
// g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
Graph NMutator::transformConvtransposed1x1(Operator _op) {
auto op = as<ConvTransposed2dNHWCObj>(_op);
if (!op)
return nullptr;
const auto &A = op->getInputs()[0];
const auto &W = op->getInputs()[1];
const auto &[n, c, h, w, f, r, s] = op->getNCHWFRS();
const auto &[ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
const Shape inputDims = op->getInputs(0)->getDims();
const Shape weightDims = op->getInputs(1)->getDims();
const Shape outputDims = op->getOutput()->getDims();
const DataType dtype = A->getDType();
IT_ASSERT_TODO(op->getNumGroups() == 1);
if (h != 1 || w != 1)
return {};
IT_ASSERT_TODO(ph == pw);
IT_ASSERT_TODO(tie(sh, sw) == tuple(1, 1));
IT_ASSERT_TODO(tie(dh, dw) == tuple(1, 1));
auto g = make_ref<GraphObj>(runtime);
// NHWF
auto newA = g->addTensor(
{inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]}, dtype);
// FRSC
auto newW = g->addTensor(
{weightDims[0], weightDims[1] * weightDims[2] * weightDims[3]}, dtype);
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(A), newA, newA->getDims());
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(W), newW, newW->getDims());
Tensor newO = g->addOp<MatmulObj>(newA, newW, nullptr, 0, 0)->getOutput();
g->addOpWithOutputs<ReshapeObj>(newO, g->cloneTensor(op->getOutput()),
op->getOutput()->getDims());
return g;
}
// Graph NMutator::transformConvtransposed(Operator _op) {
// auto op = as<ConvTransposed2dNHWCObj>(_op);
// if (!op)
// return nullptr;
// const auto &AT = op->getInputs()[0];
// const auto &KT = op->getInputs()[1];
// const auto &[n, c, h, w, f, r, s] = op->getNCHWFRS();
// const auto &[ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
// IT_ASSERT_TODO(op->getNumGroups() == 1);
// if (r != 4)
// return {};
// IT_ASSERT_TODO(ph == pw);
// IT_ASSERT_TODO(tie(sh, sw) == tuple(2, 2));
// IT_ASSERT_TODO(tie(dh, dw) == tuple(1, 1));
// auto g = make_ref<Graph>();
// // TODO: implement transformation rules
// // How to efficiently write an expression...
// auto inputDims = op->getInputs(0)->getDims();
// auto weightDims = op->getInputs(1)->getDims();
// auto outputDims = op->getOutput()->getDims();
// // NHWF
// auto newA =
// g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
// inputDims[3]});
// // RSFC
// auto newW = g->tensor(
// {weightDims[0] * weightDims[1] * weightDims[3], weightDims[2]});
// auto newO = g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
// weightDims[0] * weightDims[1] * weightDims[3]});
// g->reshape(tconvOp->getInputs(0), newA);
// g->reshape(tconvOp->getInputs(1), newW);
// g->matmul(newA, newW, newO, 0, 1);
// // g->reshape(newO, tconvOp->getOutput());
// tconvOp->print();
// dbg(newO->size() * 4, tconvOp->getOutput()->size() * 9);
// assert(newO->size() * 4 == tconvOp->getOutput()->size() * 9);
// g->membound(
// {newO}, {tconvOp->getOutput()}, {}, nullptr,
// g->reshape(op->getInputs(0), newA);
// g->reshape(op->getInputs(1), newW);
// g->matmul(newA, newW, newO, 0, 1);
// // g->reshape(newO, tconvOp->getOutput());
// tconvOp->print();
// dbg(newO->size() * 4, tconvOp->getOutput()->size() * 9);
// assert(newO->size() * 4 == tconvOp->getOutput()->size() * 9);
// g->membound({newO}, {tconvOp->getOutput()}, {}, nullptr,
// memboundTime(newO->size() + tconvOp->getOutput()->size()),
// "TConv3x3 reduce");
// g->updateConnection();
// Graph graph = new Graph(g->getOperators());
// return graph;
// }
// }
// return nullptr;
// g->updateConnection();
// Graph graph = new Graph(g->getOperators());
// return graph;
// }
// Graph NMutator::transformTConv1x1(Operator op) {
@ -711,4 +748,13 @@ NMutator::generateUnaryExpr(const Operator &op) {
NameNToTensorT{{"T", op->getInputs()[0]}}};
}
void NMutator::memboundToJson(const Graph &g, const string path) {
for (auto &_op : g->getOperators()) {
if (auto op = as<MemBoundObj>(_op)) {
op->saveAsJson(path + "/" + "membound_" +
std::to_string(op->getGuid()) + ".json");
}
}
}
} // namespace infini

View File

@ -2,6 +2,7 @@
#include "nnet/Visitor/CheckOOBVisitor.h"
#include "nnet/Visitor/HashVisitor.h"
#include "nnet/Visitor/MergeMemboundMutator.h"
#include "nnet/Visitor/Serializer.h"
namespace infini {
@ -83,4 +84,9 @@ bool MemBoundObj::checkOOB(nnet::Expr expr) {
nnet::as<nnet::RangeOpNode>(expr));
}
void MemBoundObj::saveAsJson(string path) const {
bool status = nnet::Serializer().serialize(expr, path);
IT_ASSERT(status);
}
} // namespace infini