From 8dd9f30ef41df77f88e73bb0f695a4fc7b4c540a Mon Sep 17 00:00:00 2001
From: YdrMaster <ydrml@hotmail.com>
Date: Tue, 27 Jun 2023 17:37:21 +0800
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E6=AD=A3=E7=A1=AE=E5=AF=BC=E5=85=A5?=
 =?UTF-8?q?=20conv=20=E7=9A=84=20bias?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: YdrMaster <ydrml@hotmail.com>
---
 include/core/graph_handler.h              |  4 +-
 pyinfinitensor/src/pyinfinitensor/onnx.py | 56 +++++------------------
 pyinfinitensor/tests/test_onnx.py         |  2 +
 src/core/graph_handler.cc                 | 10 ++--
 src/operators/conv.cc                     | 16 +++----
 5 files changed, 30 insertions(+), 58 deletions(-)

diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h
index dd3eedaa..0acae0c6 100644
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@@ -45,8 +45,8 @@ class GraphHandlerObj {
 
     inline OpVec operators() { return g->getOperators(); }
 
-    Tensor conv(Tensor input, Tensor weight, Tensor output, int ph, int pw,
-                int sh, int sw, int dh, int dw);
+    Tensor conv(Tensor input, Tensor weight, Tensor bias, Tensor output, int ph,
+                int pw, int sh, int sw, int dh, int dw);
     Tensor convTransposed2d(Tensor input, Tensor weight, Tensor output, int ph,
                             int pw, int sh, int sw, int dh, int dw, int oph,
                             int opw);
diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py
index 41b80b3c..b045cc29 100644
--- a/pyinfinitensor/src/pyinfinitensor/onnx.py
+++ b/pyinfinitensor/src/pyinfinitensor/onnx.py
@@ -84,50 +84,18 @@ class OnnxStub:
                 else:
                     adapt = node.input[0]
 
-                if len(node.input) > 2:
-                    bias = "{}-bias".format(node.output[0])
-                    reshape = "{}-reshape".format(node.output[0])
-                    tensors[bias] = self.handler.conv(
-                        tensors[adapt],
-                        tensors[node.input[1]],
-                        None,
-                        p[0],
-                        p[1],
-                        s[0],
-                        s[1],
-                        d[0],
-                        d[1],
-                    )
-                    tensors[reshape] = self.handler.reshape(
-                        tensors[node.input[2]],
-                        None,
-                        [
-                            1,
-                            reduce(
-                                lambda acc, x: acc * x,
-                                _search_shape(model, node.input[2]),
-                            ),
-                            1,
-                            1,
-                        ],
-                    )
-                    tensors[node.output[0]] = self.handler.add(
-                        tensors[bias],
-                        tensors[reshape],
-                        tensors.get(node.output[0]),
-                    )
-                else:
-                    tensors[node.output[0]] = self.handler.conv(
-                        tensors[adapt],
-                        tensors[node.input[1]],
-                        tensors.get(node.output[0]),
-                        p[0],
-                        p[1],
-                        s[0],
-                        s[1],
-                        d[0],
-                        d[1],
-                    )
+                tensors[node.output[0]] = self.handler.conv(
+                    tensors[adapt],
+                    tensors[node.input[1]],
+                    tensors[node.input[2]] if len(node.input) > 2 else None,
+                    tensors.get(node.output[0]),
+                    p[0],
+                    p[1],
+                    s[0],
+                    s[1],
+                    d[0],
+                    d[1],
+                )
             elif node.op_type == "ConvTranspose":
                 attributes = _parse_attribute(
                     node,
diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py
index fd589eeb..fad54dce 100644
--- a/pyinfinitensor/tests/test_onnx.py
+++ b/pyinfinitensor/tests/test_onnx.py
@@ -43,6 +43,8 @@ class TestStringMethods(unittest.TestCase):
                 model = OnnxStub(onnx.load(model_file), backend.cpu_runtime()).to_onnx(
                     "new"
                 )
+                # with open("modified.onnx", "wb") as f:
+                #     f.write(model.SerializeToString())
                 model = infer_shapes(model)
 
     def test_tensor(self):
diff --git a/src/core/graph_handler.cc b/src/core/graph_handler.cc
index 6e36d0c3..72889cc4 100644
--- a/src/core/graph_handler.cc
+++ b/src/core/graph_handler.cc
@@ -23,16 +23,18 @@ Tensor GraphHandlerObj::tensor(Shape dims, int dtype) {
     return g->addTensor(std::move(dims), dtype_repr_convert(dtype));
 }
 
-Tensor GraphHandlerObj::conv(Tensor input, Tensor weight, Tensor output, int ph,
-                             int pw, int sh, int sw, int dh, int dw) {
+Tensor GraphHandlerObj::conv(Tensor input, Tensor weight, Tensor bias,
+                             Tensor output, int ph, int pw, int sh, int sw,
+                             int dh, int dw) {
     if (output) {
         g->addOpWithOutputs<ConvObj>(std::move(input), std::move(weight),
-                                     output, ph, pw, sh, sw, dh, dw);
+                                     output, ph, pw, sh, sw, dh, dw, bias,
+                                     ActType::None);
         return output;
     } else {
         return g
             ->addOp<ConvObj>(std::move(input), std::move(weight), output, ph,
-                             pw, sh, sw, dh, dw)
+                             pw, sh, sw, dh, dw, bias, ActType::None)
             ->getOutput();
     }
 }
diff --git a/src/operators/conv.cc b/src/operators/conv.cc
index 0300698a..6880bcad 100644
--- a/src/operators/conv.cc
+++ b/src/operators/conv.cc
@@ -64,10 +64,10 @@ void ConvObj::setAuxilaryAttributes(PaddingMode mode) {
 ConvObj::ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
                  int ph, int pw, int sh, int sw, int dh, int dw, Tensor bias,
                  ActType act)
-    : ConvBaseObj(OpType::Conv, {input, weight}, output, ph, pw, sh, sw, dh, dw,
-                  input, weight, act) {
-    if (bias)
-        IT_TODO_HALT();
+    : ConvBaseObj(OpType::Conv,
+                  bias ? TensorVec{input, weight, bias}
+                       : TensorVec{input, weight},
+                  output, ph, pw, sh, sw, dh, dw, input, weight, act) {
     setAuxilaryAttributes(PaddingMode::Other);
     IT_ASSERT(checkValid(graph));
 }
@@ -75,10 +75,10 @@ ConvObj::ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
 ConvObj::ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
                  PaddingMode mode, int sh, int sw, int dh, int dw, Tensor bias,
                  ActType act)
-    : ConvBaseObj(OpType::Conv, {input, weight}, output, mode, sh, sw, dh, dw,
-                  input, weight, act) {
-    if (bias)
-        IT_TODO_HALT();
+    : ConvBaseObj(OpType::Conv,
+                  bias ? TensorVec{input, weight, bias}
+                       : TensorVec{input, weight},
+                  output, mode, sh, sw, dh, dw, input, weight, act) {
     setAuxilaryAttributes(mode);
     IT_ASSERT(checkValid(graph));
 }

From 34ac0c654101c28152c2d41ae0b624946c3e796e Mon Sep 17 00:00:00 2001
From: whjthu <haojie0429@gmail.com>
Date: Wed, 28 Jun 2023 00:19:31 +0800
Subject: [PATCH 2/2] bug fix

---
 include/core/graph_handler.h |  2 +-
 include/operators/conv.h     |  2 +-
 src/apps/model_surgeon.cc    | 27 +++++++++++++--------------
 src/core/operator.cc         | 15 ++++++++++++---
 src/kernels/bang/conv.cc     |  7 ++++---
 src/operators/conv.cc        |  2 --
 6 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h
index 0acae0c6..100b7174 100644
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@@ -109,7 +109,7 @@ class GraphHandlerObj {
 
     inline void data_malloc() { g->dataMalloc(); }
 
-    inline void run() { g->getRuntime()->run(g); }
+    inline void run() { g->getRuntime()->run(g, true); }
 };
 
 } // namespace infini
diff --git a/include/operators/conv.h b/include/operators/conv.h
index a3459716..74249223 100644
--- a/include/operators/conv.h
+++ b/include/operators/conv.h
@@ -111,7 +111,7 @@ class ConvBaseObj : public OperatorObj {
     auto getNCHWFRS() const { return tuple(n, c, h, w, f, r, s); }
     auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); }
     int getChannelPerGroup() const {
-        if (type == OpType::ConvTransNHWC) {
+        if (type == OpType::ConvNHWC || type == OpType::ConvTransNHWC) {
             return inputs[1]->getDims()[3];
         } else {
             return inputs[1]->getDims()[1];
diff --git a/src/apps/model_surgeon.cc b/src/apps/model_surgeon.cc
index 6fa41668..dc2bea68 100644
--- a/src/apps/model_surgeon.cc
+++ b/src/apps/model_surgeon.cc
@@ -3,10 +3,12 @@
 #include "nnet/dbg.h"
 #include "operators/concat.h"
 #include "operators/conv.h"
+#include "operators/element_wise.h"
 #include "operators/matmul.h"
 #include "operators/pooling.h"
 #include "operators/reshape.h"
 #include "operators/transpose.h"
+#include "operators/unary.h"
 
 #ifdef USE_BANG
 #include "bang/bang_runtime.h"
@@ -57,7 +59,6 @@ Graph convertNCHWtoNHWCModel(Graph inG) {
                 if (inTensor->hasData()) {
                     tensors[uid] =
                         g->addTensor(runWeightComputation(rt, inTensor));
-
                 } else {
                     Shape s = inTensor->getDims();
                     tensors[uid] = g->addTensor(vector{s[0], s[2], s[3], s[1]},
@@ -92,15 +93,6 @@ Graph convertNCHWtoNHWCModel(Graph inG) {
             g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
                 inputs[0], inputs[1], outputs[0], ph, pw, sh, sw, dh, dw, oph,
                 opw, group, bias, cOp->getAct());
-        } else if (const auto &pOp = as<PoolingObj>(op)) {
-            auto t = g->addOp<TransposeObj>(inputs[0], nullptr,
-                                            vector<int>{0, 2, 3, 1})
-                         ->getOutput();
-            auto tt = g->addTensor(op->getOutput()->getDims(),
-                                   op->getOutput()->getDType());
-            g->cloneOperator(op, {t}, {tt});
-            g->addOpWithOutputs<TransposeObj>(tt, outputs[0],
-                                              vector<int>{0, 3, 1, 2});
         } else if (const auto &ccOp = as<ConcatObj>(op)) {
             int axis = ccOp->getDim();
             axis = vector<int>{0, 3, 1, 2}[axis];
@@ -115,6 +107,10 @@ Graph convertNCHWtoNHWCModel(Graph inG) {
                                             outputs[0]->getDims());
         } else if (const auto &mmOp = as<MatmulObj>(op)) {
             g->cloneOperator(mmOp, inputs, outputs);
+        } else if (const auto &uOp = as<UnaryObj>(op)) {
+            g->cloneOperator(uOp, inputs, outputs);
+        } else if (const auto &eOp = as<ElementWiseObj>(op)) {
+            g->cloneOperator(eOp, inputs, outputs);
         } else {
             dbg(op);
             for (auto &t : inputs) {
@@ -125,14 +121,17 @@ Graph convertNCHWtoNHWCModel(Graph inG) {
                 if (t->getDims().size() != 4)
                     IT_TODO_HALT();
             }
+            // FIXME: the weights for these operators should not be processed
             auto t = g->addOp<TransposeObj>(inputs[0], nullptr,
-                                            vector<int>{0, 2, 3, 1})
+                                            vector<int>{0, 3, 1, 2})
                          ->getOutput();
-            auto tt = g->addTensor(op->getOutput()->getDims(),
-                                   op->getOutput()->getDType());
+            t->dataMalloc();
+            auto s = op->getOutput()->getDims();
+            auto tt = g->addTensor(s, op->getOutput()->getDType());
+            tt->dataMalloc();
             g->cloneOperator(op, {t}, {tt});
             g->addOpWithOutputs<TransposeObj>(tt, outputs[0],
-                                              vector<int>{0, 3, 1, 2});
+                                              vector<int>{0, 2, 3, 1});
         }
     }
     return g;
diff --git a/src/core/operator.cc b/src/core/operator.cc
index f842e08e..bfc5186a 100644
--- a/src/core/operator.cc
+++ b/src/core/operator.cc
@@ -84,12 +84,18 @@ HashType OperatorObj::hash() const {
 
 bool OperatorObj::checkValid(GraphObj *graph) {
     auto optShapes = inferShape();
-    if (!optShapes) // shape inference failed
+    if (!optShapes) { // shape inference failed
+        std::cout << "CheckValid: Shape inference failed." << std::endl;
+        std::cout << toString() << std::endl;
         return false;
+    }
 
     const vector<Shape> &shapes = *optShapes;
-    if (shapes.size() != outputs.size())
+    if (shapes.size() != outputs.size()) {
+        std::cout << "CheckValid: Shape inference result is not consistant with outputs number." << std::endl;
+        std::cout << toString() << std::endl;
         return false;
+    }
     if (graph) { // if graph != nullptr, outputs should be created
         auto dataTypes = inferDataType();
         for (size_t i = 0; i < outputs.size(); i++) {
@@ -98,8 +104,11 @@ bool OperatorObj::checkValid(GraphObj *graph) {
         }
     } else { // if outputs have been created, check their shapes
         for (size_t i = 0; i < shapes.size(); ++i) {
-            if (shapes[i] != outputs[i]->getDims())
+            if (shapes[i] != outputs[i]->getDims()) {
+                std::cout << "CheckValid: Shape inference result is not consistant with outputs[" << i << "]." << std::endl;
+                std::cout << toString() << std::endl;
                 return false;
+            }
         }
     }
     return true;
diff --git a/src/kernels/bang/conv.cc b/src/kernels/bang/conv.cc
index 700f8ca9..a2aa0eca 100644
--- a/src/kernels/bang/conv.cc
+++ b/src/kernels/bang/conv.cc
@@ -31,8 +31,9 @@ class ConvCnnl : public BangKernelWithoutConfig {
         int inputs0Array[4] = {n, h, w, c};
         int inputs1[4] = {f, c, r, s};
         int inputs1Array[4] = {f, r, s, c};
-        int output[4] = {n, c, h, w};
-        int outputArray[4] = {n, h, w, c};
+        auto oShape = op->getOutput()->getDims();
+        int output[4] = {oShape[0], oShape[1], oShape[2], oShape[3]};
+        int outputArray[4] = {output[0], output[3], output[1], output[2]};
 
         if (op->getOpType() == OpType::Conv) {
             cnnlTensorDescriptor_t aInDesc, aDesc, bInDesc, bDesc, cInDesc,
@@ -146,7 +147,7 @@ class ConvCnnl : public BangKernelWithoutConfig {
 
             checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
             checkCnnlError(cnnlSetTensorDescriptor(
-                cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, outputArray));
+                cDesc, CNNL_LAYOUT_NHWC, CNNL_DTYPE_FLOAT, 4, output));
 
             cnnlConvolutionForwardAlgo_t algo;
             cnnlGetConvolutionForwardAlgorithm(
diff --git a/src/operators/conv.cc b/src/operators/conv.cc
index 6880bcad..39bac17e 100644
--- a/src/operators/conv.cc
+++ b/src/operators/conv.cc
@@ -135,8 +135,6 @@ ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
                          int dw, Tensor bias, ActType act)
     : ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, ph, pw, sh, sw, dh,
                   dw, input, weight, act) {
-    if (bias)
-        IT_TODO_HALT();
     setAuxilaryAttributes(PaddingMode::Other);
     IT_ASSERT(checkValid(graph));
 }