add support for ConvNHWC operator and model converter for different layout

2023-06-25 07:19:20 +08:00 · 2023-06-25 07:19:20 +08:00 · aaeadf9211
parent 19d7dc871d
commit aaeadf9211
15 changed files with 263 additions and 22 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -98,7 +98,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG")
 # Source files
-file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
+file(GLOB_RECURSE SRC src/apps/*.cc src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
 if(USE_CUDA)
  file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu)
--- a/include/core/app.h
+++ b/include/core/app.h
@ -0,0 +1,9 @@
 #pragma once
 #include "core/graph.h"
 #include "core/runtime.h"
 namespace infini {
 Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG);
 } // namespace infini
--- a/include/core/graph.h
+++ b/include/core/graph.h
@ -16,7 +16,8 @@ class GraphObj : public Object {
    string toString() const override;
    Runtime getRuntime() const { return runtime; }
-    Tensor addTensor(Shape dim, DataType dtype = DataType::Float32);
+    Tensor addTensor(Shape dim, DataType dtype = DataType::Float32,
                     TensorType tensorType = TensorType::Other);
    Tensor addTensor(const Tensor &tensor);
    TensorVec addTensor(const TensorVec &tensors);
    /**
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@ -49,6 +49,11 @@ class GraphHandlerObj {
    Tensor convTransposed2d(Tensor input, Tensor weight, Tensor output, int ph,
                            int pw, int sh, int sw, int dh, int dw, int oph,
                            int opw);
    Tensor convNHWC(Tensor input, Tensor weight, Tensor output, int ph, int pw,
                    int sh, int sw, int dh, int dw);
    Tensor convTransposed2dNHWC(Tensor input, Tensor weight, Tensor output,
                                int ph, int pw, int sh, int sw, int dh, int dw,
                                int oph, int opw);
    Tensor matmul(Tensor a, Tensor b, Tensor y, bool transA, bool transB,
                  Tensor bias, ActType act);
    Tensor batchNorm(Tensor input, Tensor output, Tensor mean, Tensor var,
--- a/include/core/operator.h
+++ b/include/core/operator.h
@ -11,6 +11,7 @@ enum class OpType {
    Matmul,
    ConvTrans,
    ConvTransNHWC,
    ConvNHWC,
    G2BMM,
    GBMM,
    Pad,
@ -121,6 +122,8 @@ class OpRegistry {
            FOP(ConvBackwardData);
            FOP(Matmul);
            FOP(ConvTrans);
            FOP(ConvTransNHWC);
            FOP(ConvNHWC);
            FOP(G2BMM);
            FOP(GBMM);
            FOP(Pad);
--- a/include/core/tensor.h
+++ b/include/core/tensor.h
@ -12,13 +12,14 @@ namespace infini {
 // TODO: how to deal with this
 using ShapeElem = int;
 using Shape = vector<ShapeElem>;
 enum class TensorType { Error = 0, Input = 1, Initialized = 2, Other = 3 };
 class TensorObj : public TensorBaseObj {
  private:
    Shape shape;
    size_t _size; // Cache of Π(shape).
    Fuid fuid;    // Cloned tensors share the same id. Tensors constructed from
                  // scratch have a new id.
-
+    TensorType tensorType;
    void copyin(const void *ptr, size_t size) {
        runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
    }
@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj {
    }
  public:
-    TensorObj(Shape shape, DataType dtype, Runtime runtime);
+    TensorObj(Shape shape, DataType dtype, Runtime runtime,
              TensorType tensorType = TensorType::Other);
    virtual ~TensorObj() {}
    string toString() const override;
@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj {
    size_t getOffset(const vector<int> &ds) const;
    void dataMalloc();
    UidBaseType getFuid() const { return fuid; }
    TensorType getTensorType() const { return tensorType; }
    void load(std::string file_path);
    void save(std::string file_path);
--- a/include/operators/conv.h
+++ b/include/operators/conv.h
@ -149,6 +149,25 @@ class ConvObj : public ConvBaseObj {
    void setAuxilaryAttributes(PaddingMode mode) override;
 };
 class ConvNHWCObj : public ConvBaseObj {
  public:
    ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
                int ph, int pw, int sh = 1, int sw = 1, int dh = 1, int dw = 1,
                Tensor bias = nullptr, ActType act = ActType::None);
    // Constructors for setting padding mode
    ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
                PaddingMode mode = PaddingMode::Same, int sh = 1, int sw = 1,
                int dh = 1, int dw = 1, Tensor bias = nullptr,
                ActType act = ActType::None);
    OP_CLONE(ConvNHWCObj);
    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
    int getNumGroups() const override { return c / getChannelPerGroup(); }
  private:
    void setAuxilaryAttributes(PaddingMode mode) override;
 };
 class ConvBackwardFilterObj : public ConvBaseObj {
  private:
    ActType act;
@ -220,6 +239,7 @@ class ConvTransposed2dNHWCObj : public ConvBaseObj {
    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
    int getNumGroups() const override { return group; }
    std::pair<int, int> getOutputPadding() const { return {oph, opw}; }
  private:
    void setAuxilaryAttributes(PaddingMode mode) override;
--- a/include/operators/reshape.h
+++ b/include/operators/reshape.h
@ -19,7 +19,7 @@ class ReshapeObj : public OperatorObj {
     * @param output The output tensor.
     * @param dims The shape of the output tensor.
     */
-    ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims);
+    ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims = {});
    OP_CLONE(ReshapeObj);
    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
--- a/src/apps/model_surgeon.cc
+++ b/src/apps/model_surgeon.cc
@ -0,0 +1,67 @@
 #include "core/graph.h"
 #include "core/runtime.h"
 #include "nnet/dbg.h"
 #include "operators/conv.h"
 #include "operators/pooling.h"
 #include "operators/reshape.h"
 namespace infini {
 Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG) {
    // Construct new graph
    // IT_ASSERT(inG->getInputs().size() == 1);
    IT_ASSERT(inG->getOutputs().size() == 1);
    bool status = inG->topo_sort();
    IT_ASSERT(status);
    auto g = make_ref<GraphObj>(runtime);
    map<UidBaseType, Tensor> tensors;
    for (const auto &t : inG->getTensors())
        if (t->getDims().size() != 4)
            return nullptr;
    auto getTensor = [&g, &tensors](const Tensor &inTensor) {
        auto uid = inTensor->getGuid();
        if (auto it = tensors.find(uid); it == tensors.end()) {
            Shape s = inTensor->getDims();
            s = vector{s[0], s[2], s[3], s[1]};
            tensors[uid] = g->addTensor(s, inTensor->getDType(),
                                        inTensor->getTensorType());
        }
        return tensors[uid];
    };
    for (auto op : inG->getOperators()) {
        TensorVec inputs, outputs;
        for (auto &t : op->getInputs())
            inputs.emplace_back(getTensor(t));
        for (auto &t : op->getOutputs())
            outputs.emplace_back(getTensor(t));
        if (auto cOp = as<ConvObj>(op)) {
            const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
            auto bias =
                cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
            g->addOpWithOutputs<ConvNHWCObj>(inputs[0], inputs[1], outputs[0],
                                             ph, pw, sh, sw, dh, dw, bias,
                                             cOp->getAct());
        } else if (const auto &cOp = as<ConvTransposed2dObj>(op)) {
            const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
            const auto &[oph, opw] = cOp->getOutputPadding();
            auto group = cOp->getNumGroups();
            auto bias =
                cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
            g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
                inputs[0], inputs[1], outputs[0], ph, pw, sh, sw, dh, dw, oph,
                opw, group, bias, cOp->getAct());
        } else if (const auto &cOp = as<MaxPoolObj>(op)) {
            auto t = g->addOp<ReshapeObj>(inputs[0], nullptr,
                                          cOp->getInputs(0)->getDims())
                         ->getOutput();
            auto tt = g->addTensor(cOp->getOutput()->getDims(),
                                   cOp->getOutput()->getDType());
            g->cloneOperator(op, {t}, {tt});
            g->addOpWithOutputs<ReshapeObj>(tt, outputs[0]);
        } else {
            dbg(op);
            g->cloneOperator(op, inputs, outputs);
        }
    }
    return g;
 }
 } // namespace infini
--- a/src/core/graph.cc
+++ b/src/core/graph.cc
@ -129,8 +129,9 @@ void GraphObj::dataMalloc() {
    }
 }
-Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
+Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) {
-    return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
+    return tensors.emplace_back(
        make_ref<TensorObj>(dim, dtype, runtime, tensorType));
 }
 Tensor GraphObj::addTensor(const Tensor &tensor) {
--- a/src/core/graph_handler.cc
+++ b/src/core/graph_handler.cc
@ -55,6 +55,39 @@ Tensor GraphHandlerObj::convTransposed2d(Tensor input, Tensor weight,
    }
 }
 Tensor GraphHandlerObj::convNHWC(Tensor input, Tensor weight, Tensor output,
                                 int ph, int pw, int sh, int sw, int dh,
                                 int dw) {
    if (output) {
        g->addOpWithOutputs<ConvNHWCObj>(std::move(input), std::move(weight),
                                         output, ph, pw, sh, sw, dh, dw);
        return output;
    } else {
        return g
            ->addOp<ConvNHWCObj>(std::move(input), std::move(weight), output,
                                 ph, pw, sh, sw, dh, dw)
            ->getOutput();
    }
 }
 Tensor GraphHandlerObj::convTransposed2dNHWC(Tensor input, Tensor weight,
                                             Tensor output, int ph, int pw,
                                             int sh, int sw, int dh, int dw,
                                             int oph, int opw) {
    if (output) {
        g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
            std::move(input), std::move(weight), output, ph, pw, sh, sw, dh, dw,
            oph, opw);
        return output;
    } else {
        return g
            ->addOp<ConvTransposed2dNHWCObj>(std::move(input),
                                             std::move(weight), output, ph, pw,
                                             sh, sw, dh, dw, oph, opw)
            ->getOutput();
    }
 }
 Tensor GraphHandlerObj::matmul(Tensor a, Tensor b, Tensor y, bool transA,
                               bool transB, Tensor bias, ActType act) {
    if (y) {
--- a/src/core/operator.cc
+++ b/src/core/operator.cc
@ -25,7 +25,8 @@ bool OperatorObj::isConcatOp() const { return type == OpType::Concat; }
 bool OperatorObj::isComputeOp() const {
    return type == OpType::Conv || type == OpType::Matmul ||
           type == OpType::ConvTrans || type == OpType::ConvTransNHWC ||
-           type == OpType::G2BMM || type == OpType::GBMM;
+           type == OpType::G2BMM || type == OpType::GBMM ||
           type == OpType::ConvNHWC;
 }
 bool OperatorObj::isTransposeOp() const { return type == OpType::Transpose; }
--- a/src/core/tensor.cc
+++ b/src/core/tensor.cc
@ -8,12 +8,14 @@
 namespace infini {
-TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
+TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime,
                     TensorType tensorType)
    : TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)),
      _size(shape.empty()
                ? 0
-                : std::accumulate(shape.begin(), shape.end(), 1,
+                : std::accumulate(shape.begin(), shape.end(), 1lu,
-                                  [](auto acc, auto x) { return acc * x; })) {}
+                                  [](auto acc, auto x) { return acc * x; })),
      tensorType(tensorType) {}
 string TensorObj::toString() const {
    // Convert data pointer to string
@ -24,8 +26,8 @@ string TensorObj::toString() const {
        ss << "nullptr data";
    string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
                 std::to_string(fuid) + ", shape " + vecToString(shape) +
-                 ", dtype " + dtype.toString() + ", " + runtime->toString() +
+                 ", dtype " + dtype.toString() + ", tensorType " +
-                 ", " + ss.str() + "\n";
+                 std::to_string(enum_to_underlying(tensorType));
    vector<UidBaseType> targetGuids;
    for (const auto &op : targets)
        targetGuids.emplace_back(op.lock()->getGuid());
@ -34,6 +36,7 @@ string TensorObj::toString() const {
    else
        ret += ", source None";
    ret += ", targets " + vecToString(targetGuids);
    ret += ", " + runtime->toString() + ", " + ss.str();
    return ret;
 }
--- a/src/ffi/ffi_infinitensor.cc
+++ b/src/ffi/ffi_infinitensor.cc
@ -1,3 +1,4 @@
 #include "core/app.h"
 #include "core/graph_handler.h"
 #include "operators/batch_norm.h"
 #include "operators/concat.h"
@ -63,6 +64,8 @@ void export_values(py::module &m) {
        .VALUE(OpType, Conv)
        .VALUE(OpType, Matmul)
        .VALUE(OpType, ConvTrans)
        .VALUE(OpType, ConvTransNHWC)
        .VALUE(OpType, ConvNHWC)
        .VALUE(OpType, G2BMM)
        .VALUE(OpType, GBMM)
        .VALUE(OpType, Pad)
@ -132,19 +135,34 @@ static Ref<RuntimeObj> intelcpu_runtime() { return make_ref<MklRuntimeObj>(); }
 #endif
 static std::tuple<int, int, int, int, int, int> conv_attrs_of(Operator op) {
-    IT_ASSERT(op->getOpType() == OpType::Conv);
+    IT_ASSERT(op->getOpType() == OpType::Conv ||
-    auto conv = dynamic_cast<const ConvObj *>(op.get());
+              op->getOpType() == OpType::ConvNHWC);
-    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(),
+    auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
-                           conv->getDw(), conv->getSh(), conv->getSw());
+    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
                           conv->getSw(), conv->getDh(), conv->getDw());
 }
 static std::tuple<int, int, int, int, int, int, int, int>
 conv_trans_attrs_of(Operator op) {
-    IT_ASSERT(op->getOpType() == OpType::ConvTrans);
+    IT_ASSERT(op->getOpType() == OpType::ConvTrans ||
-    auto conv = dynamic_cast<const ConvTransposed2dObj *>(op.get());
+              op->getOpType() == OpType::ConvTransNHWC);
-    auto [oph, opw] = conv->getOutputPadding();
+    auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
-    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(),
+    int oph, opw;
-                           conv->getDw(), conv->getSh(), conv->getSw(), oph,
+
    if (op->getOpType() == OpType::ConvTrans) {
        auto _conv = dynamic_cast<const ConvTransposed2dObj *>(op.get());
        auto output_pad = _conv->getOutputPadding();
        oph = output_pad.first;
        opw = output_pad.second;
    } else {
        auto _conv = dynamic_cast<const ConvTransposed2dNHWCObj *>(op.get());
        auto output_pad = _conv->getOutputPadding();
        oph = output_pad.first;
        opw = output_pad.second;
    }
    return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
                           conv->getSw(), conv->getDh(), conv->getDw(), oph,
                           opw);
 }
@ -294,6 +312,9 @@ void init_graph_builder(py::module &m) {
        .def("tensor", &Handler::tensor, policy::move)
        .def("conv", &Handler::conv, policy::move)
        .def("convTransposed2d", &Handler::convTransposed2d, policy::move)
        .def("convNHWC", &Handler::convNHWC, policy::move)
        .def("convtransposed2dNHWC", &Handler::convTransposed2dNHWC,
             policy::move)
        .def("matmul", &Handler::matmul, policy::move)
        .def("batchNorm", &Handler::batchNorm, policy::move)
        .def("maxPool", &Handler::maxPool, policy::move)
@ -328,6 +349,10 @@ void init_graph_builder(py::module &m) {
        .def("run", &Handler::run, policy::automatic);
 }
 void load_apps(py::module &m) {
    m.def("convertNCHWtoNHWCModel", &convertNCHWtoNHWCModel);
 }
 } // namespace infini
 PYBIND11_MODULE(backend, m) {
@ -335,4 +360,5 @@ PYBIND11_MODULE(backend, m) {
    infini::export_values(m);
    infini::export_functions(m);
    infini::init_graph_builder(m);
    infini::load_apps(m);
 }
--- a/src/operators/conv.cc
+++ b/src/operators/conv.cc
@ -114,6 +114,75 @@ optional<vector<Shape>> ConvObj::inferShape(const TensorVec &inputs) const {
    return {{{on, oc, oh, ow}}};
 }
 void ConvNHWCObj::setAuxilaryAttributes(PaddingMode mode) {
    const Tensor &input = inputs[0];
    const Tensor &weight = inputs[1];
    n = input->getDims()[0], c = input->getDims()[3], h = input->getDims()[1],
    w = input->getDims()[2], f = weight->getDims()[0], r = weight->getDims()[1],
    s = weight->getDims()[2];
    if (mode == PaddingMode::Same) {
        int oh = h / sh;
        int ow = w / sw;
        ph = (h - oh * sh + (r - sh) * dh) / 2;
        pw = (w - ow * sw + (s - sw) * dw) / 2;
    } else if (mode == PaddingMode::Valid) {
        ph = pw = 0;
    }
 }
 ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
                         Tensor output, int ph, int pw, int sh, int sw, int dh,
                         int dw, Tensor bias, ActType act)
    : ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, ph, pw, sh, sw, dh,
                  dw, input, weight, act) {
    if (bias)
        IT_TODO_HALT();
    setAuxilaryAttributes(PaddingMode::Other);
    IT_ASSERT(checkValid(graph));
 }
 ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
                         Tensor output, PaddingMode mode, int sh, int sw,
                         int dh, int dw, Tensor bias, ActType act)
    : ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, mode, sh, sw, dh,
                  dw, input, weight, act) {
    if (bias)
        IT_TODO_HALT();
    setAuxilaryAttributes(mode);
    IT_ASSERT(checkValid(graph));
 }
 optional<vector<Shape>> ConvNHWCObj::inferShape(const TensorVec &inputs) const {
    const auto &input = inputs[0], &weight = inputs[1];
    auto n = input->getDims()[0];
    auto h = input->getDims()[1];
    auto w = input->getDims()[2];
    auto f = weight->getDims()[0];
    auto r = weight->getDims()[1];
    auto s = weight->getDims()[2];
    int on = n, oc = f;
    int oh = 0, ow = 0;
    // For NCHW+FCRS layout, C of input is divisable by C of weight
    if (input->getDims()[3] % weight->getDims()[3] != 0)
        return {};
    // Set padding size
    if (padding == PaddingMode::Other) {
        oh = (h - (r - sh) * dh + ph * 2) / sh;
        ow = (w - (s - sw) * dw + pw * 2) / sw;
    } else if (padding == PaddingMode::Same) {
        oh = h / sh;
        ow = w / sw;
        // ph = (h - oh * sh + (r - sh) * dh) / 2;
        // pw = (w - ow * sw + (s - sw) * dw) / 2;
    } else if (padding == PaddingMode::Valid) {
        int ph = 0;
        int pw = 0;
        oh = (h - (r - sh) * dh + ph * 2) / sh;
        ow = (w - (s - sw) * dw + pw * 2) / sw;
    }
    return {{{on, oh, ow, oc}}};
 }
 ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input,
                                         Tensor weight, Tensor output, int ph,
                                         int pw, int sh, int sw, int dh, int dw,