forked from jiuyuan/InfiniTensor
Add: export with random weight
This commit is contained in:
parent
df2534d209
commit
f47a411095
|
@ -68,6 +68,7 @@ class NMutator : public Mutator {
|
|||
Graph transformConvtransposed1x1(Operator _op);
|
||||
// Graph transformConvtransposed(Operator op);
|
||||
vector<Graph> transformConv1x1(Operator op);
|
||||
vector<Graph> transformConv3x3ONNX(Operator op);
|
||||
Graph transformG2bmm(Operator op);
|
||||
Graph transformGbmm(Operator op);
|
||||
Graph transformDialtedConv(Operator _op);
|
||||
|
|
|
@ -25,6 +25,7 @@ from onnx.shape_inference import infer_shapes
|
|||
from onnx.numpy_helper import to_array
|
||||
from typing import Dict, List, Any, Tuple, Sequence, Union, Optional
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
|
||||
|
||||
class OnnxStub:
|
||||
|
@ -644,6 +645,12 @@ class OnnxStub:
|
|||
name = f"input{self.count_in}_{tensor.guid()}"
|
||||
else:
|
||||
name = f"weight{self.count_in}_{tensor.guid()}"
|
||||
shape = tensor.shape()
|
||||
print('shape=', shape)
|
||||
data = np.random.randn(*shape)
|
||||
self.initializers.append(
|
||||
make_tensor(name, TensorProto.FLOAT, shape, data)
|
||||
)
|
||||
self.names[tensor] = name
|
||||
if init != None:
|
||||
init.name = name
|
||||
|
@ -819,7 +826,8 @@ class OnnxStub:
|
|||
shape,
|
||||
)
|
||||
)
|
||||
ctx.push_node(make_node(ty.name, inputs, outputs, name))
|
||||
ctx.push_node(make_node(ty.name, inputs,
|
||||
outputs, name, allowzero=0))
|
||||
elif ty == backend.OpType.Concat:
|
||||
axis = backend.concat_axis_of(op)
|
||||
ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis))
|
||||
|
@ -887,8 +895,21 @@ class OnnxStub:
|
|||
ctx.push_data_input(name, "max", TensorProto.FLOAT, [], [])
|
||||
)
|
||||
ctx.push_node(make_node(ty.name, inputs, outputs, name))
|
||||
elif ty in [backend.OpType.ConvTransNHWC, backend.OpType.GBMM,
|
||||
backend.OpType.G2BMM, backend.OpType.Any]:
|
||||
elif ty == backend.OpType.Any:
|
||||
kernel_name = backend.any_kernelName_of(op)
|
||||
normal_op = kernel_name != 'Reduce3x3Offset_hint'
|
||||
ctx.push_node(
|
||||
make_node(
|
||||
ty.name if normal_op else 'Reduce3x3OffsetPlugin',
|
||||
inputs,
|
||||
outputs,
|
||||
name,
|
||||
kernelName=kernel_name,
|
||||
domain="nnet" if normal_op else None,
|
||||
)
|
||||
)
|
||||
elif ty in [backend.OpType.ConvTransNHWC, backend.OpType.GBMM,
|
||||
backend.OpType.G2BMM]:
|
||||
ctx.push_node(
|
||||
make_node(
|
||||
ty.name,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "core/search_engine.h"
|
||||
#include "nnet/nmutator.h"
|
||||
#include "nnet/test_models.h"
|
||||
#include "operators/any.h"
|
||||
#include "operators/batch_norm.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/conv.h"
|
||||
|
@ -254,6 +255,11 @@ static vector<int64_t> pad_pads_of(Operator op) {
|
|||
return ans;
|
||||
}
|
||||
|
||||
static string any_kernelName_of(Operator op) {
|
||||
IT_ASSERT(op->getOpType() == OpType::Any);
|
||||
return as<AnyObj>(op)->getKernelName();
|
||||
}
|
||||
|
||||
static vector<int> transpose_permute_of(Operator op) {
|
||||
IT_ASSERT(op->getOpType() == OpType::Transpose);
|
||||
return dynamic_cast<const TransposeObj *>(op.get())->getPermute();
|
||||
|
@ -294,6 +300,7 @@ void export_functions(py::module &m) {
|
|||
.FUNCTION(split_axis_of)
|
||||
.FUNCTION(gather_axis_of)
|
||||
.FUNCTION(membound_expr_of)
|
||||
.FUNCTION(any_kernelName_of)
|
||||
.def("membound_hash_of",
|
||||
[](Operator op) { return as<MemBoundObj>(op)->getHash(); });
|
||||
#undef FUNCTION
|
||||
|
|
|
@ -74,7 +74,7 @@ void any_kernel_mapping(vector<float *> inputs, vector<float *> outputs,
|
|||
IT_ASSERT(outputs.size() == 1);
|
||||
conv3x3ToReduce(attr[0], attr[1], attr[2], attr[3], inputs[0],
|
||||
outputs[0], inputs.size() > 1 ? inputs[1] : nullptr);
|
||||
} else if (kernelName == "FakeOp") {
|
||||
} else if (kernelName == "FakeOp" || kernelName == "Reduce3x3Offset_hint") {
|
||||
} else {
|
||||
std::cout << "Unimplemented AnyOp cuda kernel: " << kernelName
|
||||
<< std::endl;
|
||||
|
|
|
@ -101,6 +101,8 @@ void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
|
|||
out_graphs.emplace_back(g);
|
||||
for (auto g : transformConv1xk(computeOps[0]))
|
||||
out_graphs.emplace_back(g);
|
||||
for (auto g : transformConv3x3ONNX(computeOps[0]))
|
||||
out_graphs.emplace_back(g);
|
||||
if (Graph g = transformG2bmm(computeOps[0])) {
|
||||
out_graphs.emplace_back(g);
|
||||
}
|
||||
|
@ -995,7 +997,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
|
|||
// Reorder operators: move computatation operators to the head
|
||||
for (int i = ops.size() - 2; i >= 0; --i) {
|
||||
for (int j = i; j < int(ops.size()) - 1; ++j) {
|
||||
bool swapable = false;
|
||||
bool swapable = false;
|
||||
auto [aIsC, aEw, aLRS] = classifyOperator(ops[j]);
|
||||
auto [bIsC, bEw, bLRS] = classifyOperator(ops[j + 1]);
|
||||
// check swapable conditions:
|
||||
|
@ -1004,7 +1006,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
|
|||
// (aLRS && bLRS): last dim fixed
|
||||
if ((!aIsC && bIsC) && ((aEw && bEw) || (aLRS && bLRS)))
|
||||
swapable = true;
|
||||
if (swapable) {
|
||||
if (swapable) {
|
||||
swap(ops[j], ops[j + 1]);
|
||||
}
|
||||
}
|
||||
|
@ -1036,6 +1038,7 @@ Graph NMutator::eliminateVertically(const Graph &inputGraph) {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Operator-level fusion
|
||||
// Any+Relu -> Any(activation=1)
|
||||
if (i + 1 < int(ops.size())) {
|
||||
|
@ -1194,4 +1197,34 @@ Tensor NMutator::splitTransposeMerge(Graph g, Tensor A, int dim, int chunkSize,
|
|||
return A3;
|
||||
};
|
||||
|
||||
vector<Graph> NMutator::transformConv3x3ONNX(Operator _op) {
|
||||
vector<Graph> ret;
|
||||
auto op = as<ConvObj>(_op);
|
||||
if (!op)
|
||||
return ret;
|
||||
const auto &[n, c, h, w, f, r, s] = op->getNCHWFRS();
|
||||
const auto &[ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
|
||||
if (tuple{n, c, h, w, f, r, s} != tuple{1, 512, 7, 7, 512, 3, 3} ||
|
||||
tuple{ph, pw, sh, sw, dh, dw} != tuple{1, 1, 1, 1, 1, 1})
|
||||
return ret;
|
||||
auto g = make_ref<GraphObj>(runtime);
|
||||
auto A = g->cloneTensor(op->getInputs(0));
|
||||
auto W = g->cloneTensor(op->getInputs(1)); // [F, C, R, S]
|
||||
auto O = g->cloneTensor(op->getOutput());
|
||||
A = g->addOp<ReshapeObj>(A, nullptr, vector<int>{c, h * w})
|
||||
->getOutput(); // [C, H*W]
|
||||
W = g->addOp<ReshapeObj>(W, nullptr, vector<int>{f * r * s, c})
|
||||
->getOutput(); // [F,R,S,C]
|
||||
auto O0 = g->addOp<MatmulObj>(W, A, nullptr, 0, 0) // Orignal: W X A
|
||||
->getOutput(); // [F*R*S, H*W]
|
||||
vector<int> args{};
|
||||
const string kernelName = "Reduce3x3Offset_hint";
|
||||
// const string kernelName = "FakeOp";
|
||||
auto O3 = g->addOpWithOutputs<AnyObj>(vector{O0}, vector{g->cloneTensor(O)},
|
||||
kernelName, args);
|
||||
hasTunedKernel = true; // enforce the transformation
|
||||
ret.emplace_back(g);
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace infini
|
||||
|
|
Loading…
Reference in New Issue