add support for ConvNHWC operator and model converter for different layout

2023-06-25 07:19:20 +08:00 · 2023-06-25 07:19:20 +08:00 · aaeadf9211
parent 19d7dc871d
commit aaeadf9211
15 changed files with 263 additions and 22 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -98,7 +98,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG")


 # Source files
-file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
+file(GLOB_RECURSE SRC src/apps/*.cc src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)

 if(USE_CUDA)
  file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu)
--- a/include/core/app.h
+++ b/include/core/app.h
@ -0,0 +1,9 @@
+#pragma once
+#include "core/graph.h"
+#include "core/runtime.h"
+
+namespace infini {
+
+Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG);
+
+} // namespace infini
--- a/include/core/graph.h
+++ b/include/core/graph.h
@ -16,7 +16,8 @@ class GraphObj : public Object {
    string toString() const override;
    Runtime getRuntime() const { return runtime; }

-    Tensor addTensor(Shape dim, DataType dtype = DataType::Float32);
+    Tensor addTensor(Shape dim, DataType dtype = DataType::Float32,
+                     TensorType tensorType = TensorType::Other);
    Tensor addTensor(const Tensor &tensor);
    TensorVec addTensor(const TensorVec &tensors);
    /**
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@ -49,6 +49,11 @@ class GraphHandlerObj {
    Tensor convTransposed2d(Tensor input, Tensor weight, Tensor output, int ph,
                            int pw, int sh, int sw, int dh, int dw, int oph,
                            int opw);
+    Tensor convNHWC(Tensor input, Tensor weight, Tensor output, int ph, int pw,
+                    int sh, int sw, int dh, int dw);
+    Tensor convTransposed2dNHWC(Tensor input, Tensor weight, Tensor output,
+                                int ph, int pw, int sh, int sw, int dh, int dw,
+                                int oph, int opw);
    Tensor matmul(Tensor a, Tensor b, Tensor y, bool transA, bool transB,
                  Tensor bias, ActType act);
    Tensor batchNorm(Tensor input, Tensor output, Tensor mean, Tensor var,
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -11,6 +11,7 @@ enum class OpType {
    Matmul,
    ConvTrans,
    ConvTransNHWC,
+    ConvNHWC,
    G2BMM,
    GBMM,
    Pad,
@ -121,6 +122,8 @@ class OpRegistry {
            FOP(ConvBackwardData);
            FOP(Matmul);
            FOP(ConvTrans);
+            FOP(ConvTransNHWC);
+            FOP(ConvNHWC);
            FOP(G2BMM);
            FOP(GBMM);
            FOP(Pad);
--- a/include/core/tensor.h
+++ b/include/core/tensor.h
@ -12,13 +12,14 @@ namespace infini {
 // TODO: how to deal with this
 using ShapeElem = int;
 using Shape = vector<ShapeElem>;
+enum class TensorType { Error = 0, Input = 1, Initialized = 2, Other = 3 };
 class TensorObj : public TensorBaseObj {
  private:
    Shape shape;
    size_t _size; // Cache of Π(shape).
    Fuid fuid;    // Cloned tensors share the same id. Tensors constructed from
                  // scratch have a new id.
-
+    TensorType tensorType;
    void copyin(const void *ptr, size_t size) {
        runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
    }
@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj {
    }

  public:
-    TensorObj(Shape shape, DataType dtype, Runtime runtime);
+    TensorObj(Shape shape, DataType dtype, Runtime runtime,
+              TensorType tensorType = TensorType::Other);
    virtual ~TensorObj() {}
    string toString() const override;

@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj {
    size_t getOffset(const vector<int> &ds) const;
    void dataMalloc();
    UidBaseType getFuid() const { return fuid; }
+    TensorType getTensorType() const { return tensorType; }

    void load(std::string file_path);
    void save(std::string file_path);
--- a/include/operators/conv.h
+++ b/include/operators/conv.h
@ -149,6 +149,25 @@ class ConvObj : public ConvBaseObj {
    void setAuxilaryAttributes(PaddingMode mode) override;
 };

+class ConvNHWCObj : public ConvBaseObj {
+  public:
+    ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
+                int ph, int pw, int sh = 1, int sw = 1, int dh = 1, int dw = 1,
+                Tensor bias = nullptr, ActType act = ActType::None);
+    // Constructors for setting padding mode
+    ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
+                PaddingMode mode = PaddingMode::Same, int sh = 1, int sw = 1,
+                int dh = 1, int dw = 1, Tensor bias = nullptr,
+                ActType act = ActType::None);
+    OP_CLONE(ConvNHWCObj);
+
+    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
+    int getNumGroups() const override { return c / getChannelPerGroup(); }
+
+  private:
+    void setAuxilaryAttributes(PaddingMode mode) override;
+};
+
 class ConvBackwardFilterObj : public ConvBaseObj {
  private:
    ActType act;
@ -220,6 +239,7 @@ class ConvTransposed2dNHWCObj : public ConvBaseObj {

    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
    int getNumGroups() const override { return group; }
+    std::pair<int, int> getOutputPadding() const { return {oph, opw}; }

  private:
    void setAuxilaryAttributes(PaddingMode mode) override;
--- a/include/operators/reshape.h
+++ b/include/operators/reshape.h
@ -19,7 +19,7 @@ class ReshapeObj : public OperatorObj {
     * @param output The output tensor.
     * @param dims The shape of the output tensor.
     */
-    ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims);
+    ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims = {});
    OP_CLONE(ReshapeObj);

    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
--- a/src/apps/model_surgeon.cc
+++ b/src/apps/model_surgeon.cc
@ -0,0 +1,67 @@
+#include "core/graph.h"
+#include "core/runtime.h"
+#include "nnet/dbg.h"
+#include "operators/conv.h"
+#include "operators/pooling.h"
+#include "operators/reshape.h"
+
+namespace infini {
+Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG) {
+    // Construct new graph
+    // IT_ASSERT(inG->getInputs().size() == 1);
+    IT_ASSERT(inG->getOutputs().size() == 1);
+    bool status = inG->topo_sort();
+    IT_ASSERT(status);
+    auto g = make_ref<GraphObj>(runtime);
+    map<UidBaseType, Tensor> tensors;
+    for (const auto &t : inG->getTensors())
+        if (t->getDims().size() != 4)
+            return nullptr;
+    auto getTensor = [&g, &tensors](const Tensor &inTensor) {
+        auto uid = inTensor->getGuid();
+        if (auto it = tensors.find(uid); it == tensors.end()) {
+            Shape s = inTensor->getDims();
+            s = vector{s[0], s[2], s[3], s[1]};
+            tensors[uid] = g->addTensor(s, inTensor->getDType(),
+                                        inTensor->getTensorType());
+        }
+        return tensors[uid];
+    };
+    for (auto op : inG->getOperators()) {
+        TensorVec inputs, outputs;
+        for (auto &t : op->getInputs())
+            inputs.emplace_back(getTensor(t));
+        for (auto &t : op->getOutputs())
+            outputs.emplace_back(getTensor(t));
+        if (auto cOp = as<ConvObj>(op)) {
+            const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
+            auto bias =
+                cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
+            g->addOpWithOutputs<ConvNHWCObj>(inputs[0], inputs[1], outputs[0],
+                                             ph, pw, sh, sw, dh, dw, bias,
+                                             cOp->getAct());
+        } else if (const auto &cOp = as<ConvTransposed2dObj>(op)) {
+            const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
+            const auto &[oph, opw] = cOp->getOutputPadding();
+            auto group = cOp->getNumGroups();
+            auto bias =
+                cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
+            g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
+                inputs[0], inputs[1], outputs[0], ph, pw, sh, sw, dh, dw, oph,
+                opw, group, bias, cOp->getAct());
+        } else if (const auto &cOp = as<MaxPoolObj>(op)) {
+            auto t = g->addOp<ReshapeObj>(inputs[0], nullptr,
+                                          cOp->getInputs(0)->getDims())
+                         ->getOutput();
+            auto tt = g->addTensor(cOp->getOutput()->getDims(),
+                                   cOp->getOutput()->getDType());
+            g->cloneOperator(op, {t}, {tt});
+            g->addOpWithOutputs<ReshapeObj>(tt, outputs[0]);
+        } else {
+            dbg(op);
+            g->cloneOperator(op, inputs, outputs);
+        }
+    }
+    return g;
+}
+} // namespace infini
--- a/src/core/graph.cc
+++ b/src/core/graph.cc
@ -129,8 +129,9 @@ void GraphObj::dataMalloc() {
    }
 }

-Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
-    return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
+Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) {
+    return tensors.emplace_back(
+        make_ref<TensorObj>(dim, dtype, runtime, tensorType));
 }

 Tensor GraphObj::addTensor(const Tensor &tensor) {
--- a/src/core/graph_handler.cc
+++ b/src/core/graph_handler.cc
@ -55,6 +55,39 @@ Tensor GraphHandlerObj::convTransposed2d(Tensor input, Tensor weight,
    }
 }

+Tensor GraphHandlerObj::convNHWC(Tensor input, Tensor weight, Tensor output,
+                                 int ph, int pw, int sh, int sw, int dh,
+                                 int dw) {
+    if (output) {
+        g->addOpWithOutputs<ConvNHWCObj>(std::move(input), std::move(weight),
+                                         output, ph, pw, sh, sw, dh, dw);
+        return output;
+    } else {
+        return g
+            ->addOp<ConvNHWCObj>(std::move(input), std::move(weight), output,
+                                 ph, pw, sh, sw, dh, dw)
+            ->getOutput();
+    }
+}
+
+Tensor GraphHandlerObj::convTransposed2dNHWC(Tensor input, Tensor weight,
+                                             Tensor output, int ph, int pw,
+                                             int sh, int sw, int dh, int dw,
+                                             int oph, int opw) {
+    if (output) {
+        g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
+            std::move(input), std::move(weight), output, ph, pw, sh, sw, dh, dw,
+            oph, opw);
+        return output;
+    } else {
+        return g
+            ->addOp<ConvTransposed2dNHWCObj>(std::move(input),
+                                             std::move(weight), output, ph, pw,
+                                             sh, sw, dh, dw, oph, opw)
+            ->getOutput();
+    }
+}
+
 Tensor GraphHandlerObj::matmul(Tensor a, Tensor b, Tensor y, bool transA,
                               bool transB, Tensor bias, ActType act) {
    if (y) {
--- a/src/core/operator.cc
+++ b/src/core/operator.cc
@ -25,7 +25,8 @@ bool OperatorObj::isConcatOp() const { return type == OpType::Concat; }
 bool OperatorObj::isComputeOp() const {
    return type == OpType::Conv || type == OpType::Matmul ||
           type == OpType::ConvTrans || type == OpType::ConvTransNHWC ||
-           type == OpType::G2BMM || type == OpType::GBMM;
+           type == OpType::G2BMM || type == OpType::GBMM ||
+           type == OpType::ConvNHWC;
 }

 bool OperatorObj::isTransposeOp() const { return type == OpType::Transpose; }
--- a/src/core/tensor.cc
+++ b/src/core/tensor.cc
@ -8,12 +8,14 @@

 namespace infini {

-TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
+TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime,
+                     TensorType tensorType)
    : TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)),
      _size(shape.empty()
                ? 0
-                : std::accumulate(shape.begin(), shape.end(), 1,
-                                  [](auto acc, auto x) { return acc * x; })) {}
+                : std::accumulate(shape.begin(), shape.end(), 1lu,
+                                  [](auto acc, auto x) { return acc * x; })),
+      tensorType(tensorType) {}

 string TensorObj::toString() const {
    // Convert data pointer to string
@ -24,8 +26,8 @@ string TensorObj::toString() const {
        ss << "nullptr data";
    string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
                 std::to_string(fuid) + ", shape " + vecToString(shape) +
-                 ", dtype " + dtype.toString() + ", " + runtime->toString() +
-                 ", " + ss.str() + "\n";
+                 ", dtype " + dtype.toString() + ", tensorType " +
+                 std::to_string(enum_to_underlying(tensorType));
    vector<UidBaseType> targetGuids;
    for (const auto &op : targets)
        targetGuids.emplace_back(op.lock()->getGuid());
@ -34,6 +36,7 @@ string TensorObj::toString() const {
    else
        ret += ", source None";
    ret += ", targets " + vecToString(targetGuids);
+    ret += ", " + runtime->toString() + ", " + ss.str();
    return ret;
 }

--- a/src/ffi/ffi_infinitensor.cc
+++ b/src/ffi/ffi_infinitensor.cc
@ -1,3 +1,4 @@
+#include "core/app.h"
 #include "core/graph_handler.h"
 #include "operators/batch_norm.h"
 #include "operators/concat.h"
@ -63,6 +64,8 @@ void export_values(py::module &m) {
        .VALUE(OpType, Conv)
        .VALUE(OpType, Matmul)
        .VALUE(OpType, ConvTrans)
+        .VALUE(OpType, ConvTransNHWC)
+        .VALUE(OpType, ConvNHWC)
        .VALUE(OpType, G2BMM)
        .VALUE(OpType, GBMM)
        .VALUE(OpType, Pad)
@ -132,19 +135,34 @@ static Ref<RuntimeObj> intelcpu_runtime() { return make_ref<MklRuntimeObj>(); }
 #endif

 static std::tuple<int, int, int, int, int, int> conv_attrs_of(Operator op) {
-    IT_ASSERT(op->getOpType() == OpType::Conv);
-    auto conv = dynamic_cast<const ConvObj *>(op.get());
-    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(),
-                           conv->getDw(), conv->getSh(), conv->getSw());
+    IT_ASSERT(op->getOpType() == OpType::Conv ||
+              op->getOpType() == OpType::ConvNHWC);
+    auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
+    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
+                           conv->getSw(), conv->getDh(), conv->getDw());
 }

 static std::tuple<int, int, int, int, int, int, int, int>
 conv_trans_attrs_of(Operator op) {
-    IT_ASSERT(op->getOpType() == OpType::ConvTrans);
-    auto conv = dynamic_cast<const ConvTransposed2dObj *>(op.get());
-    auto [oph, opw] = conv->getOutputPadding();
-    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(),
-                           conv->getDw(), conv->getSh(), conv->getSw(), oph,
+    IT_ASSERT(op->getOpType() == OpType::ConvTrans ||
+              op->getOpType() == OpType::ConvTransNHWC);
+    auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
+    int oph, opw;
+
+    if (op->getOpType() == OpType::ConvTrans) {
+        auto _conv = dynamic_cast<const ConvTransposed2dObj *>(op.get());
+        auto output_pad = _conv->getOutputPadding();
+        oph = output_pad.first;
+        opw = output_pad.second;
+    } else {
+        auto _conv = dynamic_cast<const ConvTransposed2dNHWCObj *>(op.get());
+        auto output_pad = _conv->getOutputPadding();
+        oph = output_pad.first;
+        opw = output_pad.second;
+    }
+
+    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
+                           conv->getSw(), conv->getDh(), conv->getDw(), oph,
                           opw);
 }

@ -294,6 +312,9 @@ void init_graph_builder(py::module &m) {
        .def("tensor", &Handler::tensor, policy::move)
        .def("conv", &Handler::conv, policy::move)
        .def("convTransposed2d", &Handler::convTransposed2d, policy::move)
+        .def("convNHWC", &Handler::convNHWC, policy::move)
+        .def("convtransposed2dNHWC", &Handler::convTransposed2dNHWC,
+             policy::move)
        .def("matmul", &Handler::matmul, policy::move)
        .def("batchNorm", &Handler::batchNorm, policy::move)
        .def("maxPool", &Handler::maxPool, policy::move)
@ -328,6 +349,10 @@ void init_graph_builder(py::module &m) {
        .def("run", &Handler::run, policy::automatic);
 }

+void load_apps(py::module &m) {
+    m.def("convertNCHWtoNHWCModel", &convertNCHWtoNHWCModel);
+}
+
 } // namespace infini

 PYBIND11_MODULE(backend, m) {
@ -335,4 +360,5 @@ PYBIND11_MODULE(backend, m) {
    infini::export_values(m);
    infini::export_functions(m);
    infini::init_graph_builder(m);
+    infini::load_apps(m);
 }
--- a/src/operators/conv.cc
+++ b/src/operators/conv.cc
@ -114,6 +114,75 @@ optional<vector<Shape>> ConvObj::inferShape(const TensorVec &inputs) const {
    return {{{on, oc, oh, ow}}};
 }

+void ConvNHWCObj::setAuxilaryAttributes(PaddingMode mode) {
+    const Tensor &input = inputs[0];
+    const Tensor &weight = inputs[1];
+    n = input->getDims()[0], c = input->getDims()[3], h = input->getDims()[1],
+    w = input->getDims()[2], f = weight->getDims()[0], r = weight->getDims()[1],
+    s = weight->getDims()[2];
+    if (mode == PaddingMode::Same) {
+        int oh = h / sh;
+        int ow = w / sw;
+        ph = (h - oh * sh + (r - sh) * dh) / 2;
+        pw = (w - ow * sw + (s - sw) * dw) / 2;
+    } else if (mode == PaddingMode::Valid) {
+        ph = pw = 0;
+    }
+}
+
+ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
+                         Tensor output, int ph, int pw, int sh, int sw, int dh,
+                         int dw, Tensor bias, ActType act)
+    : ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, ph, pw, sh, sw, dh,
+                  dw, input, weight, act) {
+    if (bias)
+        IT_TODO_HALT();
+    setAuxilaryAttributes(PaddingMode::Other);
+    IT_ASSERT(checkValid(graph));
+}
+
+ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
+                         Tensor output, PaddingMode mode, int sh, int sw,
+                         int dh, int dw, Tensor bias, ActType act)
+    : ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, mode, sh, sw, dh,
+                  dw, input, weight, act) {
+    if (bias)
+        IT_TODO_HALT();
+    setAuxilaryAttributes(mode);
+    IT_ASSERT(checkValid(graph));
+}
+
+optional<vector<Shape>> ConvNHWCObj::inferShape(const TensorVec &inputs) const {
+    const auto &input = inputs[0], &weight = inputs[1];
+    auto n = input->getDims()[0];
+    auto h = input->getDims()[1];
+    auto w = input->getDims()[2];
+    auto f = weight->getDims()[0];
+    auto r = weight->getDims()[1];
+    auto s = weight->getDims()[2];
+    int on = n, oc = f;
+    int oh = 0, ow = 0;
+    // For NCHW+FCRS layout, C of input is divisable by C of weight
+    if (input->getDims()[3] % weight->getDims()[3] != 0)
+        return {};
+    // Set padding size
+    if (padding == PaddingMode::Other) {
+        oh = (h - (r - sh) * dh + ph * 2) / sh;
+        ow = (w - (s - sw) * dw + pw * 2) / sw;
+    } else if (padding == PaddingMode::Same) {
+        oh = h / sh;
+        ow = w / sw;
+        // ph = (h - oh * sh + (r - sh) * dh) / 2;
+        // pw = (w - ow * sw + (s - sw) * dw) / 2;
+    } else if (padding == PaddingMode::Valid) {
+        int ph = 0;
+        int pw = 0;
+        oh = (h - (r - sh) * dh + ph * 2) / sh;
+        ow = (w - (s - sw) * dw + pw * 2) / sw;
+    }
+    return {{{on, oh, ow, oc}}};
+}
+
 ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input,
                                         Tensor weight, Tensor output, int ph,
                                         int pw, int sh, int sw, int dh, int dw,