add support for ConvNHWC operator and model converter for different layout

This commit is contained in:
whjthu 2023-06-25 07:19:20 +08:00
parent 19d7dc871d
commit aaeadf9211
15 changed files with 263 additions and 22 deletions

View File

@ -98,7 +98,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG")
# Source files # Source files
file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc) file(GLOB_RECURSE SRC src/apps/*.cc src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
if(USE_CUDA) if(USE_CUDA)
file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu) file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu)

9
include/core/app.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
#include "core/graph.h"
#include "core/runtime.h"
namespace infini {
Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG);
} // namespace infini

View File

@ -16,7 +16,8 @@ class GraphObj : public Object {
string toString() const override; string toString() const override;
Runtime getRuntime() const { return runtime; } Runtime getRuntime() const { return runtime; }
Tensor addTensor(Shape dim, DataType dtype = DataType::Float32); Tensor addTensor(Shape dim, DataType dtype = DataType::Float32,
TensorType tensorType = TensorType::Other);
Tensor addTensor(const Tensor &tensor); Tensor addTensor(const Tensor &tensor);
TensorVec addTensor(const TensorVec &tensors); TensorVec addTensor(const TensorVec &tensors);
/** /**

View File

@ -49,6 +49,11 @@ class GraphHandlerObj {
Tensor convTransposed2d(Tensor input, Tensor weight, Tensor output, int ph, Tensor convTransposed2d(Tensor input, Tensor weight, Tensor output, int ph,
int pw, int sh, int sw, int dh, int dw, int oph, int pw, int sh, int sw, int dh, int dw, int oph,
int opw); int opw);
Tensor convNHWC(Tensor input, Tensor weight, Tensor output, int ph, int pw,
int sh, int sw, int dh, int dw);
Tensor convTransposed2dNHWC(Tensor input, Tensor weight, Tensor output,
int ph, int pw, int sh, int sw, int dh, int dw,
int oph, int opw);
Tensor matmul(Tensor a, Tensor b, Tensor y, bool transA, bool transB, Tensor matmul(Tensor a, Tensor b, Tensor y, bool transA, bool transB,
Tensor bias, ActType act); Tensor bias, ActType act);
Tensor batchNorm(Tensor input, Tensor output, Tensor mean, Tensor var, Tensor batchNorm(Tensor input, Tensor output, Tensor mean, Tensor var,

View File

@ -11,6 +11,7 @@ enum class OpType {
Matmul, Matmul,
ConvTrans, ConvTrans,
ConvTransNHWC, ConvTransNHWC,
ConvNHWC,
G2BMM, G2BMM,
GBMM, GBMM,
Pad, Pad,
@ -121,6 +122,8 @@ class OpRegistry {
FOP(ConvBackwardData); FOP(ConvBackwardData);
FOP(Matmul); FOP(Matmul);
FOP(ConvTrans); FOP(ConvTrans);
FOP(ConvTransNHWC);
FOP(ConvNHWC);
FOP(G2BMM); FOP(G2BMM);
FOP(GBMM); FOP(GBMM);
FOP(Pad); FOP(Pad);

View File

@ -12,13 +12,14 @@ namespace infini {
// TODO: how to deal with this // TODO: how to deal with this
using ShapeElem = int; using ShapeElem = int;
using Shape = vector<ShapeElem>; using Shape = vector<ShapeElem>;
enum class TensorType { Error = 0, Input = 1, Initialized = 2, Other = 3 };
class TensorObj : public TensorBaseObj { class TensorObj : public TensorBaseObj {
private: private:
Shape shape; Shape shape;
size_t _size; // Cache of Π(shape). size_t _size; // Cache of Π(shape).
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id. // scratch have a new id.
TensorType tensorType;
void copyin(const void *ptr, size_t size) { void copyin(const void *ptr, size_t size) {
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size); runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
} }
@ -27,7 +28,8 @@ class TensorObj : public TensorBaseObj {
} }
public: public:
TensorObj(Shape shape, DataType dtype, Runtime runtime); TensorObj(Shape shape, DataType dtype, Runtime runtime,
TensorType tensorType = TensorType::Other);
virtual ~TensorObj() {} virtual ~TensorObj() {}
string toString() const override; string toString() const override;
@ -39,6 +41,7 @@ class TensorObj : public TensorBaseObj {
size_t getOffset(const vector<int> &ds) const; size_t getOffset(const vector<int> &ds) const;
void dataMalloc(); void dataMalloc();
UidBaseType getFuid() const { return fuid; } UidBaseType getFuid() const { return fuid; }
TensorType getTensorType() const { return tensorType; }
void load(std::string file_path); void load(std::string file_path);
void save(std::string file_path); void save(std::string file_path);

View File

@ -149,6 +149,25 @@ class ConvObj : public ConvBaseObj {
void setAuxilaryAttributes(PaddingMode mode) override; void setAuxilaryAttributes(PaddingMode mode) override;
}; };
class ConvNHWCObj : public ConvBaseObj {
public:
ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
int ph, int pw, int sh = 1, int sw = 1, int dh = 1, int dw = 1,
Tensor bias = nullptr, ActType act = ActType::None);
// Constructors for setting padding mode
ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
PaddingMode mode = PaddingMode::Same, int sh = 1, int sw = 1,
int dh = 1, int dw = 1, Tensor bias = nullptr,
ActType act = ActType::None);
OP_CLONE(ConvNHWCObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
int getNumGroups() const override { return c / getChannelPerGroup(); }
private:
void setAuxilaryAttributes(PaddingMode mode) override;
};
class ConvBackwardFilterObj : public ConvBaseObj { class ConvBackwardFilterObj : public ConvBaseObj {
private: private:
ActType act; ActType act;
@ -220,6 +239,7 @@ class ConvTransposed2dNHWCObj : public ConvBaseObj {
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override; optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
int getNumGroups() const override { return group; } int getNumGroups() const override { return group; }
std::pair<int, int> getOutputPadding() const { return {oph, opw}; }
private: private:
void setAuxilaryAttributes(PaddingMode mode) override; void setAuxilaryAttributes(PaddingMode mode) override;

View File

@ -19,7 +19,7 @@ class ReshapeObj : public OperatorObj {
* @param output The output tensor. * @param output The output tensor.
* @param dims The shape of the output tensor. * @param dims The shape of the output tensor.
*/ */
ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims); ReshapeObj(GraphObj *graph, Tensor input, Tensor output, Shape dims = {});
OP_CLONE(ReshapeObj); OP_CLONE(ReshapeObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override; optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;

67
src/apps/model_surgeon.cc Normal file
View File

@ -0,0 +1,67 @@
#include "core/graph.h"
#include "core/runtime.h"
#include "nnet/dbg.h"
#include "operators/conv.h"
#include "operators/pooling.h"
#include "operators/reshape.h"
namespace infini {
Graph convertNCHWtoNHWCModel(Runtime runtime, Graph inG) {
// Construct new graph
// IT_ASSERT(inG->getInputs().size() == 1);
IT_ASSERT(inG->getOutputs().size() == 1);
bool status = inG->topo_sort();
IT_ASSERT(status);
auto g = make_ref<GraphObj>(runtime);
map<UidBaseType, Tensor> tensors;
for (const auto &t : inG->getTensors())
if (t->getDims().size() != 4)
return nullptr;
auto getTensor = [&g, &tensors](const Tensor &inTensor) {
auto uid = inTensor->getGuid();
if (auto it = tensors.find(uid); it == tensors.end()) {
Shape s = inTensor->getDims();
s = vector{s[0], s[2], s[3], s[1]};
tensors[uid] = g->addTensor(s, inTensor->getDType(),
inTensor->getTensorType());
}
return tensors[uid];
};
for (auto op : inG->getOperators()) {
TensorVec inputs, outputs;
for (auto &t : op->getInputs())
inputs.emplace_back(getTensor(t));
for (auto &t : op->getOutputs())
outputs.emplace_back(getTensor(t));
if (auto cOp = as<ConvObj>(op)) {
const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
auto bias =
cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
g->addOpWithOutputs<ConvNHWCObj>(inputs[0], inputs[1], outputs[0],
ph, pw, sh, sw, dh, dw, bias,
cOp->getAct());
} else if (const auto &cOp = as<ConvTransposed2dObj>(op)) {
const auto &[ph, pw, sh, sw, dh, dw] = cOp->getPadStrideDilation();
const auto &[oph, opw] = cOp->getOutputPadding();
auto group = cOp->getNumGroups();
auto bias =
cOp->getBias() ? g->cloneTensor(cOp->getBias()) : nullptr;
g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
inputs[0], inputs[1], outputs[0], ph, pw, sh, sw, dh, dw, oph,
opw, group, bias, cOp->getAct());
} else if (const auto &cOp = as<MaxPoolObj>(op)) {
auto t = g->addOp<ReshapeObj>(inputs[0], nullptr,
cOp->getInputs(0)->getDims())
->getOutput();
auto tt = g->addTensor(cOp->getOutput()->getDims(),
cOp->getOutput()->getDType());
g->cloneOperator(op, {t}, {tt});
g->addOpWithOutputs<ReshapeObj>(tt, outputs[0]);
} else {
dbg(op);
g->cloneOperator(op, inputs, outputs);
}
}
return g;
}
} // namespace infini

View File

@ -129,8 +129,9 @@ void GraphObj::dataMalloc() {
} }
} }
Tensor GraphObj::addTensor(Shape dim, DataType dtype) { Tensor GraphObj::addTensor(Shape dim, DataType dtype, TensorType tensorType) {
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime)); return tensors.emplace_back(
make_ref<TensorObj>(dim, dtype, runtime, tensorType));
} }
Tensor GraphObj::addTensor(const Tensor &tensor) { Tensor GraphObj::addTensor(const Tensor &tensor) {

View File

@ -55,6 +55,39 @@ Tensor GraphHandlerObj::convTransposed2d(Tensor input, Tensor weight,
} }
} }
Tensor GraphHandlerObj::convNHWC(Tensor input, Tensor weight, Tensor output,
int ph, int pw, int sh, int sw, int dh,
int dw) {
if (output) {
g->addOpWithOutputs<ConvNHWCObj>(std::move(input), std::move(weight),
output, ph, pw, sh, sw, dh, dw);
return output;
} else {
return g
->addOp<ConvNHWCObj>(std::move(input), std::move(weight), output,
ph, pw, sh, sw, dh, dw)
->getOutput();
}
}
Tensor GraphHandlerObj::convTransposed2dNHWC(Tensor input, Tensor weight,
Tensor output, int ph, int pw,
int sh, int sw, int dh, int dw,
int oph, int opw) {
if (output) {
g->addOpWithOutputs<ConvTransposed2dNHWCObj>(
std::move(input), std::move(weight), output, ph, pw, sh, sw, dh, dw,
oph, opw);
return output;
} else {
return g
->addOp<ConvTransposed2dNHWCObj>(std::move(input),
std::move(weight), output, ph, pw,
sh, sw, dh, dw, oph, opw)
->getOutput();
}
}
Tensor GraphHandlerObj::matmul(Tensor a, Tensor b, Tensor y, bool transA, Tensor GraphHandlerObj::matmul(Tensor a, Tensor b, Tensor y, bool transA,
bool transB, Tensor bias, ActType act) { bool transB, Tensor bias, ActType act) {
if (y) { if (y) {

View File

@ -25,7 +25,8 @@ bool OperatorObj::isConcatOp() const { return type == OpType::Concat; }
bool OperatorObj::isComputeOp() const { bool OperatorObj::isComputeOp() const {
return type == OpType::Conv || type == OpType::Matmul || return type == OpType::Conv || type == OpType::Matmul ||
type == OpType::ConvTrans || type == OpType::ConvTransNHWC || type == OpType::ConvTrans || type == OpType::ConvTransNHWC ||
type == OpType::G2BMM || type == OpType::GBMM; type == OpType::G2BMM || type == OpType::GBMM ||
type == OpType::ConvNHWC;
} }
bool OperatorObj::isTransposeOp() const { return type == OpType::Transpose; } bool OperatorObj::isTransposeOp() const { return type == OpType::Transpose; }

View File

@ -8,12 +8,14 @@
namespace infini { namespace infini {
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime) TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime,
TensorType tensorType)
: TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)), : TensorBaseObj(shape_.size(), dtype, runtime), shape(std::move(shape_)),
_size(shape.empty() _size(shape.empty()
? 0 ? 0
: std::accumulate(shape.begin(), shape.end(), 1, : std::accumulate(shape.begin(), shape.end(), 1lu,
[](auto acc, auto x) { return acc * x; })) {} [](auto acc, auto x) { return acc * x; })),
tensorType(tensorType) {}
string TensorObj::toString() const { string TensorObj::toString() const {
// Convert data pointer to string // Convert data pointer to string
@ -24,8 +26,8 @@ string TensorObj::toString() const {
ss << "nullptr data"; ss << "nullptr data";
string ret = "Tensor " + std::to_string(guid) + ", Fuid " + string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
std::to_string(fuid) + ", shape " + vecToString(shape) + std::to_string(fuid) + ", shape " + vecToString(shape) +
", dtype " + dtype.toString() + ", " + runtime->toString() + ", dtype " + dtype.toString() + ", tensorType " +
", " + ss.str() + "\n"; std::to_string(enum_to_underlying(tensorType));
vector<UidBaseType> targetGuids; vector<UidBaseType> targetGuids;
for (const auto &op : targets) for (const auto &op : targets)
targetGuids.emplace_back(op.lock()->getGuid()); targetGuids.emplace_back(op.lock()->getGuid());
@ -34,6 +36,7 @@ string TensorObj::toString() const {
else else
ret += ", source None"; ret += ", source None";
ret += ", targets " + vecToString(targetGuids); ret += ", targets " + vecToString(targetGuids);
ret += ", " + runtime->toString() + ", " + ss.str();
return ret; return ret;
} }

View File

@ -1,3 +1,4 @@
#include "core/app.h"
#include "core/graph_handler.h" #include "core/graph_handler.h"
#include "operators/batch_norm.h" #include "operators/batch_norm.h"
#include "operators/concat.h" #include "operators/concat.h"
@ -63,6 +64,8 @@ void export_values(py::module &m) {
.VALUE(OpType, Conv) .VALUE(OpType, Conv)
.VALUE(OpType, Matmul) .VALUE(OpType, Matmul)
.VALUE(OpType, ConvTrans) .VALUE(OpType, ConvTrans)
.VALUE(OpType, ConvTransNHWC)
.VALUE(OpType, ConvNHWC)
.VALUE(OpType, G2BMM) .VALUE(OpType, G2BMM)
.VALUE(OpType, GBMM) .VALUE(OpType, GBMM)
.VALUE(OpType, Pad) .VALUE(OpType, Pad)
@ -132,19 +135,34 @@ static Ref<RuntimeObj> intelcpu_runtime() { return make_ref<MklRuntimeObj>(); }
#endif #endif
static std::tuple<int, int, int, int, int, int> conv_attrs_of(Operator op) { static std::tuple<int, int, int, int, int, int> conv_attrs_of(Operator op) {
IT_ASSERT(op->getOpType() == OpType::Conv); IT_ASSERT(op->getOpType() == OpType::Conv ||
auto conv = dynamic_cast<const ConvObj *>(op.get()); op->getOpType() == OpType::ConvNHWC);
return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(), auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
conv->getDw(), conv->getSh(), conv->getSw()); return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
conv->getSw(), conv->getDh(), conv->getDw());
} }
static std::tuple<int, int, int, int, int, int, int, int> static std::tuple<int, int, int, int, int, int, int, int>
conv_trans_attrs_of(Operator op) { conv_trans_attrs_of(Operator op) {
IT_ASSERT(op->getOpType() == OpType::ConvTrans); IT_ASSERT(op->getOpType() == OpType::ConvTrans ||
auto conv = dynamic_cast<const ConvTransposed2dObj *>(op.get()); op->getOpType() == OpType::ConvTransNHWC);
auto [oph, opw] = conv->getOutputPadding(); auto conv = dynamic_cast<const ConvBaseObj *>(op.get());
return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(), int oph, opw;
conv->getDw(), conv->getSh(), conv->getSw(), oph,
if (op->getOpType() == OpType::ConvTrans) {
auto _conv = dynamic_cast<const ConvTransposed2dObj *>(op.get());
auto output_pad = _conv->getOutputPadding();
oph = output_pad.first;
opw = output_pad.second;
} else {
auto _conv = dynamic_cast<const ConvTransposed2dNHWCObj *>(op.get());
auto output_pad = _conv->getOutputPadding();
oph = output_pad.first;
opw = output_pad.second;
}
return std::make_tuple(conv->getPh(), conv->getPw(), conv->getSh(),
conv->getSw(), conv->getDh(), conv->getDw(), oph,
opw); opw);
} }
@ -294,6 +312,9 @@ void init_graph_builder(py::module &m) {
.def("tensor", &Handler::tensor, policy::move) .def("tensor", &Handler::tensor, policy::move)
.def("conv", &Handler::conv, policy::move) .def("conv", &Handler::conv, policy::move)
.def("convTransposed2d", &Handler::convTransposed2d, policy::move) .def("convTransposed2d", &Handler::convTransposed2d, policy::move)
.def("convNHWC", &Handler::convNHWC, policy::move)
.def("convtransposed2dNHWC", &Handler::convTransposed2dNHWC,
policy::move)
.def("matmul", &Handler::matmul, policy::move) .def("matmul", &Handler::matmul, policy::move)
.def("batchNorm", &Handler::batchNorm, policy::move) .def("batchNorm", &Handler::batchNorm, policy::move)
.def("maxPool", &Handler::maxPool, policy::move) .def("maxPool", &Handler::maxPool, policy::move)
@ -328,6 +349,10 @@ void init_graph_builder(py::module &m) {
.def("run", &Handler::run, policy::automatic); .def("run", &Handler::run, policy::automatic);
} }
void load_apps(py::module &m) {
m.def("convertNCHWtoNHWCModel", &convertNCHWtoNHWCModel);
}
} // namespace infini } // namespace infini
PYBIND11_MODULE(backend, m) { PYBIND11_MODULE(backend, m) {
@ -335,4 +360,5 @@ PYBIND11_MODULE(backend, m) {
infini::export_values(m); infini::export_values(m);
infini::export_functions(m); infini::export_functions(m);
infini::init_graph_builder(m); infini::init_graph_builder(m);
infini::load_apps(m);
} }

View File

@ -114,6 +114,75 @@ optional<vector<Shape>> ConvObj::inferShape(const TensorVec &inputs) const {
return {{{on, oc, oh, ow}}}; return {{{on, oc, oh, ow}}};
} }
void ConvNHWCObj::setAuxilaryAttributes(PaddingMode mode) {
const Tensor &input = inputs[0];
const Tensor &weight = inputs[1];
n = input->getDims()[0], c = input->getDims()[3], h = input->getDims()[1],
w = input->getDims()[2], f = weight->getDims()[0], r = weight->getDims()[1],
s = weight->getDims()[2];
if (mode == PaddingMode::Same) {
int oh = h / sh;
int ow = w / sw;
ph = (h - oh * sh + (r - sh) * dh) / 2;
pw = (w - ow * sw + (s - sw) * dw) / 2;
} else if (mode == PaddingMode::Valid) {
ph = pw = 0;
}
}
ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
Tensor output, int ph, int pw, int sh, int sw, int dh,
int dw, Tensor bias, ActType act)
: ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, ph, pw, sh, sw, dh,
dw, input, weight, act) {
if (bias)
IT_TODO_HALT();
setAuxilaryAttributes(PaddingMode::Other);
IT_ASSERT(checkValid(graph));
}
ConvNHWCObj::ConvNHWCObj(GraphObj *graph, Tensor input, Tensor weight,
Tensor output, PaddingMode mode, int sh, int sw,
int dh, int dw, Tensor bias, ActType act)
: ConvBaseObj(OpType::ConvNHWC, {input, weight}, output, mode, sh, sw, dh,
dw, input, weight, act) {
if (bias)
IT_TODO_HALT();
setAuxilaryAttributes(mode);
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>> ConvNHWCObj::inferShape(const TensorVec &inputs) const {
const auto &input = inputs[0], &weight = inputs[1];
auto n = input->getDims()[0];
auto h = input->getDims()[1];
auto w = input->getDims()[2];
auto f = weight->getDims()[0];
auto r = weight->getDims()[1];
auto s = weight->getDims()[2];
int on = n, oc = f;
int oh = 0, ow = 0;
// For NCHW+FCRS layout, C of input is divisable by C of weight
if (input->getDims()[3] % weight->getDims()[3] != 0)
return {};
// Set padding size
if (padding == PaddingMode::Other) {
oh = (h - (r - sh) * dh + ph * 2) / sh;
ow = (w - (s - sw) * dw + pw * 2) / sw;
} else if (padding == PaddingMode::Same) {
oh = h / sh;
ow = w / sw;
// ph = (h - oh * sh + (r - sh) * dh) / 2;
// pw = (w - ow * sw + (s - sw) * dw) / 2;
} else if (padding == PaddingMode::Valid) {
int ph = 0;
int pw = 0;
oh = (h - (r - sh) * dh + ph * 2) / sh;
ow = (w - (s - sw) * dw + pw * 2) / sw;
}
return {{{on, oh, ow, oc}}};
}
ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input, ConvTransposed2dObj::ConvTransposed2dObj(GraphObj *graph, Tensor input,
Tensor weight, Tensor output, int ph, Tensor weight, Tensor output, int ph,
int pw, int sh, int sw, int dh, int dw, int pw, int sh, int sw, int dh, int dw,