Add Conv operator and naive CPU implemenation (#5)

* Add: Conv definition

* Add: tensor copy data from vector

* Add: CPU conv kernel

* Fix: replace Int32 with UInt32 in DataType

Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com>
This commit is contained in:
zhengly123 2022-08-17 14:16:01 +08:00 committed by GitHub
parent a26890abce
commit 9303ddda8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 369 additions and 23 deletions

View File

@ -81,6 +81,7 @@ if(BUILD_TEST)
enable_testing()
if(BUILD_TEST_CORE)
build_test(test/core/*.cc)
build_test(test/operators/*.cc)
endif()
if(BUILD_TEST_PET)
build_test(test/pet/*.cc)

View File

@ -57,6 +57,18 @@ template <typename T> auto enum_to_underlying(T e) {
return static_cast<std::underlying_type_t<T>>(e);
}
template <typename T> std::string vecToString(const std::vector<T> &vec) {
std::string ret;
ret.append("[");
for (auto d : vec) {
ret.append(std::to_string(d));
ret.append(", ");
}
ret.pop_back();
ret.append("]");
return ret;
}
double timeit(const std::function<void()> &func);
} // namespace infini

View File

@ -16,7 +16,7 @@ class GraphObj : public Object {
// Graph(OpVec oplist);
string toString() const override;
Tensor addTensor(Shape dim, DataType dtype = DataType::Int32);
Tensor addTensor(Shape dim, DataType dtype = DataType::UInt32);
/**
* @brief Add an operator and create its outputs. Output tensor arguments

View File

@ -24,8 +24,13 @@ class TensorObj : public TensorBaseObj {
using TensorBaseObj::getData;
VType getData(const Shape &pos) const;
void copyData(VType *dptr);
void copyData(vector<VType> dataVector);
void printData() const;
bool equalData(const Tensor &rhs) const;
void
setData(std::function<void(void *, size_t, DataType)> generator) const {
generator((void *)(data.get()), size(), dtype);
}
// void setDims(const Dim &dms) { dims = dms; }
// bool dataRand(int seed = 0) {

View File

@ -22,7 +22,7 @@ using VType = uint32_t;
enum class DataType {
Float32,
Int32,
UInt32,
};
class TensorBaseObj : public Object {

68
include/operators/conv.h Normal file
View File

@ -0,0 +1,68 @@
#pragma once
#include "core/operator.h"
namespace infini {
class ConvObj : public OperatorObj {
public:
// When PaddingMode is Other, ConvObj will use padding size (ph, pw)
// Otherwise, padding size (ph, pw) will be computed by padding mode
enum class PaddingMode {
Other,
Same,
Valid,
};
private:
int ph, pw;
int sh, sw;
int dh, dw;
ActType act;
PaddingMode padding;
// auxiliary attributes
int n, c, h, w, f, r, s;
public:
// Constructors for explicitly setting padding size
ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output, int ph,
int pw, int sh = 1, int sw = 1, int dh = 1, int dw = 1,
Tensor bias = nullptr, ActType act = ActType::None);
// Constructors for setting padding mode
ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
PaddingMode mode = PaddingMode::Same, int sh = 1, int sw = 1,
int dh = 1, int dw = 1, Tensor bias = nullptr,
ActType act = ActType::None);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
std::string toString() const override;
int numInputs() const override { return 3; }
int numOutputs() const override { return 1; }
Tensor getBias() const { return inputs[2]; }
ActType getAct() const { return act; }
PaddingMode getPaddingMode() const { return padding; }
pair<int, int> inferPaddingSize() const;
int getDh() const { return dh; }
int getDw() const { return dw; }
int getPh() const { return ph; }
int getPw() const { return pw; }
int getSh() const { return sh; }
int getSw() const { return sw; }
auto getNCHWFRS() const { return tuple(n, c, h, w, f, r, s); }
auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); }
int getChannelPerGroup() const { return inputs[1]->getDims()[1]; }
int getNumGroups() const { return c / getChannelPerGroup(); }
private:
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
/**
* @brief Set the Auxilary Attributes: nchwrfs and padding (ph, pw) if
* padding mode is set. This function should be called in constructor.
*/
void setAuxilaryAttributes(PaddingMode mode);
};
} // namespace infini

View File

@ -1,3 +1,35 @@
#pragma once
#include "core/common.h"
#include "core/tensor_base.h"
#include "gtest/gtest.h"
namespace infini {
class DataGenerator {
private:
virtual void fill(uint32_t *data, size_t size) { IT_TODO_HALT(); };
virtual void fill(float *data, size_t size) { IT_TODO_HALT(); };
public:
void operator()(void *data, size_t size, DataType dataType) {
switch (dataType) {
case DataType::UInt32:
fill(reinterpret_cast<uint32_t *>(data), size);
break;
case DataType::Float32:
fill(reinterpret_cast<float *>(data), size);
break;
default:
IT_TODO_HALT();
}
}
};
class IncrementalGenerator : public DataGenerator {
void fill(uint32_t *data, size_t size) override {
for (size_t i = 0; i < size; i++) {
data[i] = i;
}
}
};
} // namespace infini

View File

@ -17,7 +17,7 @@ void RunEngine::run(const Graph &graph, bool tune, bool profiling) const {
for (auto &op : graph->getOperators()) {
// HACK: set correct data type
auto kernelAttrs =
KernelAttrs{device, op->getOpType(), DataType::Int32};
KernelAttrs{device, op->getOpType(), DataType::UInt32};
Kernel *kernel = kernelRegistry.getKernel(kernelAttrs);
auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()};
std::optional<PerfRecord> perfData = perfEngine.getPerfData(perfKey);
@ -64,7 +64,7 @@ double RunEngine::getPerfTime(const Graph &graph, bool profiling) const {
for (auto &op : graph->getOperators()) {
// HACK: set correct data type
auto kernelAttrs =
KernelAttrs{device, op->getOpType(), DataType::Int32};
KernelAttrs{device, op->getOpType(), DataType::UInt32};
Kernel *kernel = kernelRegistry.getKernel(kernelAttrs);
auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()};
std::optional<PerfRecord> perfData = perfEngine.getPerfData(perfKey);

View File

@ -45,6 +45,10 @@ void TensorObj::copyData(VType *dptr) {
data[i] = dptr[i];
}
}
void TensorObj::copyData(vector<VType> dataVector) {
IT_ASSERT(dataVector.size() >= size());
copyData(dataVector.data());
}
void TensorObj::printData() const {
IT_ASSERT(data != nullptr);

60
src/kerels/cpu/conv.cc Normal file
View File

@ -0,0 +1,60 @@
#include "operators/conv.h"
#include "core/kernel.h"
namespace infini {
template <typename T> class NaiveConv : public Kernel {
void compute(const Operator &_op, const PerfRecord &record) const override {
auto op = as<ConvObj>(_op);
T *iptr = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
T *wptr = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
T *optr = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
auto [n, c, h, w, f, r, s] = op->getNCHWFRS();
auto [ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
int cpg = op->getChannelPerGroup();
int g = op->getNumGroups();
IT_ASSERT(f % g == 0, "Illegal number of channel");
auto outDim = op->getOutput()->getDims();
int oh = outDim[2], ow = outDim[3];
for (int nn = 0; nn < n; nn++) {
#pragma omp parallel for
for (int ff = 0; ff < f; ff++) {
for (int hh = 0; hh < oh; hh++)
for (int ww = 0; ww < ow; ww++) {
int gidx = ff / (f / g);
VType val = 0;
for (int cc = 0; cc < cpg; cc++)
for (int rr = 0; rr < r; rr++)
for (int ss = 0; ss < s; ss++) {
// clang-format off
int posH = hh * sh + rr * dh - ph;
int posW = ww * sw + ss * dw - pw;
if (posH < 0 || posH >= h || posW < 0 || posW >= w)
continue;
auto iOffset = posW + w * (posH + h * ((cc + gidx * cpg) + c * nn)),
wOffset = ss + s * (rr + r * (cc + cpg * ff));
auto inputVal = iptr[iOffset], weightVal = wptr[wOffset];
val += weightVal * inputVal;
// clang-format on
}
// TODO: check correctness, oh & ow or h & w?
auto oOffset = ww + ow * (hh + oh * (ff + f * nn));
optr[oOffset] = val;
}
}
}
}
void compute(const Operator &op) const override { compute(op, {}); }
PerfRecord tune(const Operator &op) const override {
return PerfRecord{.time = timeit([this, &op]() { compute(op); })};
}
};
REGISTER_KERNEL(Device::CPU, OpType::Conv, DataType::UInt32,
NaiveConv<uint32_t>, "ConvNaive_CPU_uint32");
REGISTER_KERNEL(Device::CPU, OpType::Conv, DataType::Float32, NaiveConv<float>,
"ConvNaive_CPU_float32");
} // namespace infini

View File

@ -30,7 +30,7 @@ template <typename T> class NaiveMatmul : public Kernel {
}
};
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::UInt32,
NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
NaiveMatmul<float>, "MatmulNaive_CPU_float32");

101
src/operators/conv.cc Normal file
View File

@ -0,0 +1,101 @@
#include "operators/conv.h"
namespace infini {
ConvObj::ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
int ph, int pw, int sh, int sw, int dh, int dw, Tensor bias,
ActType act)
: OperatorObj(OpType::Conv, {input, weight, bias}, {output}), ph(ph),
pw(pw), sh(sh), sw(sw), dh(dh), dw(dw), act(act),
padding(PaddingMode::Other) {
setAuxilaryAttributes(PaddingMode::Other);
IT_ASSERT(checkValid(graph));
}
ConvObj::ConvObj(GraphObj *graph, Tensor input, Tensor weight, Tensor output,
PaddingMode mode, int sh, int sw, int dh, int dw, Tensor bias,
ActType act)
: OperatorObj(OpType::Conv, {input, weight, bias}, {output}), ph(-1),
pw(-1), sh(sh), sw(sw), dh(dh), dw(dw), act(act), padding(mode) {
IT_ASSERT(mode != PaddingMode::Other);
setAuxilaryAttributes(mode);
IT_ASSERT(checkValid(graph));
}
string ConvObj::toString() const {
std::ostringstream os;
os << "Conv[" << getGuid() << "]";
os << "(";
if (inputs.size() == 2) {
os << vecToString(inputs[0]->getDims()) << ",";
os << vecToString(inputs[1]->getDims()) << ",";
}
os << "p=[" << ph << "," << pw << "],";
os << "s=[" << sh << "," << sw << "],";
os << "d=[" << dh << "," << dw << "],";
os << "act=" << enum_to_underlying(act) << ",";
os << "input=" << inputs[0]->getGuid() << ",";
os << "weight=" << inputs[1]->getGuid() << ",";
os << "output=" << outputs[0]->getGuid() << ")";
return os.str();
}
optional<vector<Shape>> ConvObj::inferShape(const TensorVec &inputs) const {
const auto &input = inputs[0], &weight = inputs[1];
auto n = input->getDims()[0];
auto h = input->getDims()[2];
auto w = input->getDims()[3];
auto f = weight->getDims()[0];
auto r = weight->getDims()[2];
auto s = weight->getDims()[3];
int on = n, oc = f;
int oh = 0, ow = 0;
// For NCHW+FCRS layout, C of input is divisable by C of weight
if (input->getDims()[1] % weight->getDims()[1] != 0)
return {};
// Set padding size
if (padding == PaddingMode::Other) {
oh = (h - (r - sh) * dh + ph * 2) / sh;
ow = (w - (s - sw) * dw + pw * 2) / sw;
} else if (padding == PaddingMode::Same) {
oh = h / sh;
ow = w / sw;
// ph = (h - oh * sh + (r - sh) * dh) / 2;
// pw = (w - ow * sw + (s - sw) * dw) / 2;
} else if (padding == PaddingMode::Valid) {
int ph = 0;
int pw = 0;
oh = (h - (r - sh) * dh + ph * 2) / sh;
ow = (w - (s - sw) * dw + pw * 2) / sw;
}
return {{{on, oc, oh, ow}}};
}
vector<int> ConvObj::getWorkloadVector() const {
return {
enum_to_underlying(type), n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw,
enum_to_underlying(act)};
}
vector<int> ConvObj::getOpAttrVector() const {
IT_TODO_HALT(); // should padding mode / ph+pw be in attrs?
return {enum_to_underlying(type), c, f, r, s, ph, pw, sh, sw, dh, dw,
enum_to_underlying(act)};
}
void ConvObj::setAuxilaryAttributes(PaddingMode mode) {
n = inputs[0]->getDims()[0], c = inputs[0]->getDims()[1],
h = inputs[0]->getDims()[2], w = inputs[0]->getDims()[3],
f = inputs[1]->getDims()[0], r = inputs[1]->getDims()[2],
s = inputs[1]->getDims()[3];
if (mode == PaddingMode::Same) {
int oh = h / sh;
int ow = w / sw;
ph = (h - oh * sh + (r - sh) * dh) / 2;
pw = (w - ow * sw + (s - sw) * dw) / 2;
} else if (mode == PaddingMode::Valid) {
ph = pw = 0;
}
}
} // namespace infini

View File

@ -7,39 +7,39 @@ namespace infini {
TEST(Graph, build_and_run) {
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 2, 3}, DataType::Int32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::Int32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::Int32);
Tensor i0 = g->addTensor({1, 2, 3}, DataType::UInt32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
w0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
i0->copyData({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
RunEngine(Device::CPU).run(g);
// check answer
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::Int32);
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32);
ans->dataMalloc();
ans->copyData(vector<VType>{38, 44, 50, 56, 83, 98, 113, 128}.data());
ans->copyData({38, 44, 50, 56, 83, 98, 113, 128});
EXPECT_TRUE(o0->equalData(ans));
}
TEST(Graph, perf_engine) {
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 2, 3}, DataType::Int32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::Int32);
Tensor i0 = g->addTensor({1, 2, 3}, DataType::UInt32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
auto matmul = g->addOp<MatmulObj>(i0, w0, nullptr);
g->dataMalloc();
i0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
w0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
i0->copyData({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
RunEngine(Device::CPU).run(g, true, true);
double perfTime = RunEngine(Device::CPU).getPerfTime(g);
// The example matmul takes 0.0036ms with one core
EXPECT_GT(perfTime, 0);
EXPECT_LT(perfTime, 0.01);
// check answer
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::Int32);
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32);
ans->dataMalloc();
ans->copyData(vector<VType>{38, 44, 50, 56, 83, 98, 113, 128}.data());
ans->copyData({38, 44, 50, 56, 83, 98, 113, 128});
EXPECT_TRUE(matmul->getOutput()->equalData(ans));
}

View File

@ -9,9 +9,9 @@ TEST(Hash, OperatorHash) {
OpPerfKey key1(0, OpType::Unknown), key2(0, OpType::Unknown);
{ // build with addOpWithOutputs
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 2, 3}, DataType::Int32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::Int32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::Int32);
Tensor i0 = g->addTensor({1, 2, 3}, DataType::UInt32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
key1 = matmul->getOpPerfKey();
EXPECT_NE(key1.hash, 0);
@ -19,8 +19,8 @@ TEST(Hash, OperatorHash) {
}
{ // build with addOp
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({2, 2, 3}, DataType::Int32);
Tensor w0 = g->addTensor({2, 3, 4}, DataType::Int32);
Tensor i0 = g->addTensor({2, 2, 3}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 4}, DataType::UInt32);
auto matmul = g->addOp<MatmulObj>(i0, w0, nullptr);
key2 = matmul->getOpPerfKey();
EXPECT_NE(key2.hash, 0);

View File

@ -0,0 +1,63 @@
#include "core/graph.h"
#include "core/run_enigne.h"
#include "operators/conv.h"
#include "test.h"
namespace infini {
TEST(Conv, ShapeInference) {
// Padding modes
{
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1);
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
}
{
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
auto conv =
g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Same);
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
}
{
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
auto conv =
g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Valid);
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
}
{ // dilation & stride
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
}
}
TEST(Conv, NaiveCPU) {
Graph g = make_ref<GraphObj>();
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
g->dataMalloc();
i0->setData(IncrementalGenerator());
w0->setData(IncrementalGenerator());
RunEngine(Device::CPU).run(g, true, true);
double perfTime = RunEngine(Device::CPU).getPerfTime(g);
// The example matmul takes 0.0036ms with one core
EXPECT_GT(perfTime, 0);
EXPECT_LT(perfTime, 5);
// check answer
auto ans = make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32);
ans->dataMalloc();
ans->copyData({4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
EXPECT_TRUE(conv->getOutput()->equalData(ans));
}
} // namespace infini