Add: export with random weight

This commit is contained in:
Liyan Zheng 2023-04-30 22:25:07 +08:00
parent df2534d209
commit f47a411095
5 changed files with 68 additions and 6 deletions

View File

@ -68,6 +68,7 @@ class NMutator : public Mutator {
Graph transformConvtransposed1x1(Operator _op);
// Graph transformConvtransposed(Operator op);
vector<Graph> transformConv1x1(Operator op);
vector<Graph> transformConv3x3ONNX(Operator op);
Graph transformG2bmm(Operator op);
Graph transformGbmm(Operator op);
Graph transformDialtedConv(Operator _op);

View File

@ -25,6 +25,7 @@ from onnx.shape_inference import infer_shapes
from onnx.numpy_helper import to_array
from typing import Dict, List, Any, Tuple, Sequence, Union, Optional
from functools import reduce
import numpy as np
class OnnxStub:
@ -644,6 +645,12 @@ class OnnxStub:
name = f"input{self.count_in}_{tensor.guid()}"
else:
name = f"weight{self.count_in}_{tensor.guid()}"
shape = tensor.shape()
print('shape=', shape)
data = np.random.randn(*shape)
self.initializers.append(
make_tensor(name, TensorProto.FLOAT, shape, data)
)
self.names[tensor] = name
if init != None:
init.name = name
@ -819,7 +826,8 @@ class OnnxStub:
shape,
)
)
ctx.push_node(make_node(ty.name, inputs, outputs, name))
ctx.push_node(make_node(ty.name, inputs,
outputs, name, allowzero=0))
elif ty == backend.OpType.Concat:
axis = backend.concat_axis_of(op)
ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis))
@ -887,8 +895,21 @@ class OnnxStub:
ctx.push_data_input(name, "max", TensorProto.FLOAT, [], [])
)
ctx.push_node(make_node(ty.name, inputs, outputs, name))
elif ty in [backend.OpType.ConvTransNHWC, backend.OpType.GBMM,
backend.OpType.G2BMM, backend.OpType.Any]:
elif ty == backend.OpType.Any:
kernel_name = backend.any_kernelName_of(op)
normal_op = kernel_name != 'Reduce3x3Offset_hint'
ctx.push_node(
make_node(
ty.name if normal_op else 'Reduce3x3OffsetPlugin',
inputs,
outputs,
name,
kernelName=kernel_name,
domain="nnet" if normal_op else None,
)
)
elif ty in [backend.OpType.ConvTransNHWC, backend.OpType.GBMM,
backend.OpType.G2BMM]:
ctx.push_node(
make_node(
ty.name,

View File

@ -3,6 +3,7 @@
#include "core/search_engine.h"
#include "nnet/nmutator.h"
#include "nnet/test_models.h"
#include "operators/any.h"
#include "operators/batch_norm.h"
#include "operators/concat.h"
#include "operators/conv.h"
@ -254,6 +255,11 @@ static vector<int64_t> pad_pads_of(Operator op) {
return ans;
}
static string any_kernelName_of(Operator op) {
IT_ASSERT(op->getOpType() == OpType::Any);
return as<AnyObj>(op)->getKernelName();
}
static vector<int> transpose_permute_of(Operator op) {
IT_ASSERT(op->getOpType() == OpType::Transpose);
return dynamic_cast<const TransposeObj *>(op.get())->getPermute();
@ -294,6 +300,7 @@ void export_functions(py::module &m) {
.FUNCTION(split_axis_of)
.FUNCTION(gather_axis_of)
.FUNCTION(membound_expr_of)
.FUNCTION(any_kernelName_of)
.def("membound_hash_of",
[](Operator op) { return as<MemBoundObj>(op)->getHash(); });
#undef FUNCTION

View File

@ -74,7 +74,7 @@ void any_kernel_mapping(vector<float *> inputs, vector<float *> outputs,
IT_ASSERT(outputs.size() == 1);
conv3x3ToReduce(attr[0], attr[1], attr[2], attr[3], inputs[0],
outputs[0], inputs.size() > 1 ? inputs[1] : nullptr);
} else if (kernelName == "FakeOp") {
} else if (kernelName == "FakeOp" || kernelName == "Reduce3x3Offset_hint") {
} else {
std::cout << "Unimplemented AnyOp cuda kernel: " << kernelName
<< std::endl;

View File

@ -101,6 +101,8 @@ void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
out_graphs.emplace_back(g);
for (auto g : transformConv1xk(computeOps[0]))
out_graphs.emplace_back(g);
for (auto g : transformConv3x3ONNX(computeOps[0]))
out_graphs.emplace_back(g);
if (Graph g = transformG2bmm(computeOps[0])) {
out_graphs.emplace_back(g);
}
@ -995,7 +997,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
// Reorder operators: move computatation operators to the head
for (int i = ops.size() - 2; i >= 0; --i) {
for (int j = i; j < int(ops.size()) - 1; ++j) {
bool swapable = false;
bool swapable = false;
auto [aIsC, aEw, aLRS] = classifyOperator(ops[j]);
auto [bIsC, bEw, bLRS] = classifyOperator(ops[j + 1]);
// check swapable conditions:
@ -1004,7 +1006,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
// (aLRS && bLRS): last dim fixed
if ((!aIsC && bIsC) && ((aEw && bEw) || (aLRS && bLRS)))
swapable = true;
if (swapable) {
if (swapable) {
swap(ops[j], ops[j + 1]);
}
}
@ -1036,6 +1038,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
continue;
}
}
// Operator-level fusion
// Any+Relu -> Any(activation=1)
if (i + 1 < int(ops.size())) {
@ -1194,4 +1197,34 @@ Tensor NMutator::splitTransposeMerge(Graph g, Tensor A, int dim, int chunkSize,
return A3;
};
vector<Graph> NMutator::transformConv3x3ONNX(Operator _op) {
vector<Graph> ret;
auto op = as<ConvObj>(_op);
if (!op)
return ret;
const auto &[n, c, h, w, f, r, s] = op->getNCHWFRS();
const auto &[ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
if (tuple{n, c, h, w, f, r, s} != tuple{1, 512, 7, 7, 512, 3, 3} ||
tuple{ph, pw, sh, sw, dh, dw} != tuple{1, 1, 1, 1, 1, 1})
return ret;
auto g = make_ref<GraphObj>(runtime);
auto A = g->cloneTensor(op->getInputs(0));
auto W = g->cloneTensor(op->getInputs(1)); // [F, C, R, S]
auto O = g->cloneTensor(op->getOutput());
A = g->addOp<ReshapeObj>(A, nullptr, vector<int>{c, h * w})
->getOutput(); // [C, H*W]
W = g->addOp<ReshapeObj>(W, nullptr, vector<int>{f * r * s, c})
->getOutput(); // [F,R,S,C]
auto O0 = g->addOp<MatmulObj>(W, A, nullptr, 0, 0) // Orignal: W X A
->getOutput(); // [F*R*S, H*W]
vector<int> args{};
const string kernelName = "Reduce3x3Offset_hint";
// const string kernelName = "FakeOp";
auto O3 = g->addOpWithOutputs<AnyObj>(vector{O0}, vector{g->cloneTensor(O)},
kernelName, args);
hasTunedKernel = true; // enforce the transformation
ret.emplace_back(g);
return ret;
}
} // namespace infini