Add conv2bgemm and fix mutator::runtime

This commit is contained in:
Liyan Zheng 2023-04-24 13:06:24 +08:00
parent 079985bc8c
commit 1e46750159
7 changed files with 66 additions and 43 deletions

View File

@ -20,8 +20,10 @@ class NMutator : public Mutator {
const std::vector<int> derivationRules;
public:
NMutator(Mode mode = Mode::Normal);
NMutator(Mode mode, const std::vector<int> &derivationRules);
NMutator(Mode mode = Mode::Normal,
Runtime runtime = NativeCpuRuntimeObj::getInstance());
NMutator(Mode mode, const std::vector<int> &derivationRules,
Runtime runtime = NativeCpuRuntimeObj::getInstance());
~NMutator();
vector<Graph> run(const Graph &in_graph) override;

View File

@ -102,8 +102,6 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
double time = -1e9;
if (ctcMap[op->getGuid()]) { // Compile-time computable operators
time = 0;
} else if (op->getOpType() == OpType::Reshape) {
time = 0;
} else if (op->getOpType() == OpType::MemBound && ignoreMemboundOp) {
time = 0;
} else if (op->getOpType() == OpType::MemBound && allowEstimation) {
@ -136,8 +134,10 @@ double RuntimeObj::getPerfTime(const Graph &graph, bool profiling,
t->freeData();
}
if (op->getOpType() != OpType::Transpose &&
op->getOpType() != OpType::ReduceMean)
// FIXME: ignore trnapose when necessary
// op->getOpType() != OpType::Transpose &&
// op->getOpType() != OpType::ReduceMean
if (op->getOpType() != OpType::Reshape)
totalTime += time;
if (profiling) {
op->print();

View File

@ -351,6 +351,12 @@ std::vector<Graph> SearchEngine::searchMutation(const MetaGraph &metaGraph) {
// mutatedGraphs.resize(2);
// if (mutatedGraphs.size() >= 2)
// mutatedGraphs = {mutatedGraphs[1]};
constexpr bool chooseBestMutation = false;
if (chooseBestMutation && mutatedGraphs.size() >= 2) {
std::sort(mutatedGraphs.begin(), mutatedGraphs.end(),
graphTimeComparer);
mutatedGraphs = {mutatedGraphs[0]};
}
for (auto graph : graphs) {
for (auto mutatedGraph : mutatedGraphs) {
std::vector<Operator> ops;

View File

@ -155,7 +155,6 @@ double CudaRuntimeObj::timeWithCudaGraph(Graph graph, int rounds) {
auto tvm_device = tvm::runtime::DeviceAPI::Get(tvm_device_id);
tvm_device->SetStream(tvm_device_id, getStream());
#endif
beginCudaGraphStreamCapture();
for (auto &[op, kernel, perfData] : kernels) {
if (perfData)
@ -174,8 +173,8 @@ double CudaRuntimeObj::timeWithCudaGraph(Graph graph, int rounds) {
[&, cudaGraphInstance = cudaGraphInstance, stream = getStream()]() {
checkCudaError(cudaGraphLaunch(cudaGraphInstance, stream));
},
[&, stream = getStream()]() { cudaStreamSynchronize(stream); }, 1000,
1000);
[&, stream = getStream()]() { cudaStreamSynchronize(stream); }, rounds,
rounds);
}
} // namespace infini

View File

@ -16,6 +16,7 @@
#include "operators/reshape.h"
#include "operators/softmax.h"
#include "operators/transpose.h"
#include "operators/pooling.h"
#include "operators/unary.h"
#include "test.h"
#include <pybind11/stl.h>
@ -366,11 +367,11 @@ Graph optimizeGraph(Graph g, Runtime _runtime, bool tuning, NMutator::Mode mode,
Ref<NMutator> mutator;
if (mode == NMutator::Mode::Normal) {
dbg(mode);
mutator = make_ref<NMutator>(mode);
mutator = make_ref<NMutator>(mode, runtime);
} else if (mode == NMutator::Mode::RuleBased) {
dbg(mode, rules);
IT_ASSERT_TODO(rules.size() > 0);
mutator = make_ref<NMutator>(mode, rules);
mutator = make_ref<NMutator>(mode, rules, runtime);
} else
IT_TODO_HALT();
vector<Graph> bestGraphs;

View File

@ -22,12 +22,15 @@
namespace infini {
NMutator::NMutator(Mode mode) : Mutator(10), mode{mode} {
NMutator::NMutator(Mode mode, Runtime runtime)
: Mutator(10, runtime), mode{mode} {
IT_ASSERT(mode != Mode::RuleBased, "Specify rules for the RuleBased mode.");
}
NMutator::NMutator(Mode mode, const std::vector<int> &derivationRules)
: Mutator(10), mode{Mode::RuleBased}, derivationRules{derivationRules} {
NMutator::NMutator(Mode mode, const std::vector<int> &derivationRules,
Runtime runtime)
: Mutator(10, runtime), mode{Mode::RuleBased}, derivationRules{
derivationRules} {
IT_ASSERT(mode == Mode::RuleBased);
}
@ -93,37 +96,29 @@ void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
IT_ASSERT(computeOps.size() == 1);
if (Graph g = transformConvtransposed1x1(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (Graph g = transformConv1x1(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (infini::Graph g = transformConv1xk(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (Graph g = transformG2bmm(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (Graph g = transformGbmm(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (infini::Graph g = transformDialtedConv(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (infini::Graph g = transformConvToGEMMReduce(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
if (infini::Graph g = transformConvTranposeToGEMMReduce(computeOps[0])) {
out_graphs.emplace_back(g);
return;
}
return;
const set<OpType> opSet{OpType::Conv, OpType::ConvTransNHWC, OpType::G2BMM,
OpType::GBMM};
@ -735,25 +730,45 @@ Graph NMutator::transformConv1x1(Operator _op) {
g->addOp<MatmulObj>(B, A, nullptr, 0, 0)->getOutput(); // [F, N*H*W]
g->addOpWithOutputs<ReshapeObj>(O, g->cloneTensor(op->getOutput()),
op->getOutput()->getDims());
} else {
auto A = g->addOp<TransposeObj>(g->cloneTensor(op->getInputs(0)),
nullptr, vector{1, 0, 2, 3})
->getOutput(); // [C,N,H,W]
A = g->addOp<ReshapeObj>(A, nullptr,
vector{shapeA[1], shapeA[0] * shapeA[2] *
shapeA[3]}) // [C, N*H*W]
// } else { // Tranpose + Matmul + Transpose
// auto A = g->addOp<TransposeObj>(g->cloneTensor(op->getInputs(0)),
// nullptr, vector{1, 0, 2, 3})
// ->getOutput(); // [C,N,H,W]
// A = g->addOp<ReshapeObj>(A, nullptr,
// vector{shapeA[1], shapeA[0] * shapeA[2]
// *
// shapeA[3]}) // [C,
// N*H*W]
// ->getOutput();
// auto B = g->addOp<ReshapeObj>(g->cloneTensor(op->getInputs(1)),
// nullptr,
// vector{shapeW[0], shapeW[1]}) //
// [F, C]
// ->getOutput();
// auto O =
// g->addOp<MatmulObj>(B, A, nullptr, 0, 0)->getOutput(); // [F,
// NHW]
// O = g->addOp<ReshapeObj>(
// O, nullptr, Shape{shapeO[1], shapeO[0], shapeO[2],
// shapeO[3]})
// ->getOutput(); // [F, NHW]
// O = g->addOpWithOutputs<TransposeObj>(
// O, g->cloneTensor(op->getOutput()), vector{1, 0, 2, 3})
// ->getOutput(); // [F, N*H*W]
} else { // BGemm
auto A =
g->addOp<ReshapeObj>(g->cloneTensor(op->getInputs(0)), nullptr,
vector{shapeA[0], shapeA[1],
shapeA[2] * shapeA[3]}) // [N, C, H*W]
->getOutput();
auto B =
g->addOp<ReshapeObj>(g->cloneTensor(op->getInputs(1)), nullptr,
vector{1, shapeW[0], shapeW[1]}) // [1, F, C]
->getOutput();
auto B = g->addOp<ReshapeObj>(g->cloneTensor(op->getInputs(1)), nullptr,
vector{shapeW[0], shapeW[1]}) // [F, C]
->getOutput();
auto O =
g->addOp<MatmulObj>(B, A, nullptr, 0, 0)->getOutput(); // [F, NHW]
O = g->addOp<ReshapeObj>(
O, nullptr, Shape{shapeO[1], shapeO[0], shapeO[2], shapeO[3]})
->getOutput(); // [F, NHW]
O = g->addOpWithOutputs<TransposeObj>(
O, g->cloneTensor(op->getOutput()), vector{1, 0, 2, 3})
->getOutput(); // [F, N*H*W]
g->addOp<MatmulObj>(B, A, nullptr, 0, 0)->getOutput(); // [F, N*H*W]
g->addOpWithOutputs<ReshapeObj>(O, g->cloneTensor(op->getOutput()),
op->getOutput()->getDims());
}
return g;
}

View File

@ -167,14 +167,14 @@ if __name__ == "__main__":
for original_g, name in graphs:
print(f"=== {name}")
# save_onnx(original_g, f"hkz_orig_{name}.onnx")
# save_onnx(original_g, f"orig_{name}.onnx")
# original_g = ft.convertNCHWtoNHWCModel(runtime, original_g)
# save_onnx(original_g, f"hkz_dlt_{name}.onnx")
# save_onnx(dlt_g, f"dlt_{name}.onnx")
# exit()
# run_and_evaluate(runtime, original_g)
g = ft.optimizeGraph(original_g, runtime, False, ft.NMutatorMode.RuleBased,
[1, 7, 7, 2, 8, 6, 6]) # G2BMM/GBMM
# g = ft.optimizeGraph(original_g, runtime, False, ft.NMutatorMode.RuleBased,
# [3, 2, 2, 5, 8, 8, 6, 90]) # Conv2conv
# g = ft.optimizeGraph(original_g, runtime, False, ft.NMutatorMode.Normal)