forked from jiuyuan/InfiniTensor
Merge branch 'master' into xpu
This commit is contained in:
commit
c82d5fdc60
|
@ -32,9 +32,9 @@ class GraphHandlerObj {
|
|||
float momentum, float eps, bool training);
|
||||
|
||||
Tensor maxPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw,
|
||||
int ph, int pw, int sh, int sw);
|
||||
int ph, int pw, int sh, int sw, int ceilMode);
|
||||
Tensor avgPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw,
|
||||
int ph, int pw, int sh, int sw);
|
||||
int ph, int pw, int sh, int sw, int ceilMode);
|
||||
|
||||
Tensor add(Tensor a, Tensor b, Tensor c);
|
||||
Tensor sub(Tensor a, Tensor b, Tensor c);
|
||||
|
@ -45,12 +45,14 @@ class GraphHandlerObj {
|
|||
Tensor max(Tensor a, Tensor b, Tensor c);
|
||||
|
||||
Tensor relu(Tensor x, Tensor y);
|
||||
Tensor gelu(Tensor x, Tensor y);
|
||||
Tensor sigmoid(Tensor x, Tensor y);
|
||||
Tensor tanh(Tensor x, Tensor y);
|
||||
Tensor erf(Tensor x, Tensor y);
|
||||
Tensor softmax(Tensor x, Tensor y, int axis);
|
||||
Tensor abs(Tensor x, Tensor y);
|
||||
Tensor sqrt(Tensor x, Tensor y);
|
||||
Tensor neg(Tensor x, Tensor y);
|
||||
Tensor shape(Tensor x, Tensor y);
|
||||
Tensor identity(Tensor x, Tensor y);
|
||||
Tensor flatten(Tensor s, Tensor y, int axis);
|
||||
|
|
|
@ -73,6 +73,7 @@ struct OpType {
|
|||
GatherElements,
|
||||
GatherND,
|
||||
Gemm,
|
||||
Gelu, // Unary
|
||||
GlobalAveragePool, // GlobalPool
|
||||
GlobalLpPool, // GlobalPool
|
||||
GlobalMaxPool, // GlobalPool
|
||||
|
|
|
@ -3,14 +3,15 @@
|
|||
#include "operators/unary.h"
|
||||
|
||||
namespace infini {
|
||||
// TODO(constroy): num should be size_t.
|
||||
void softmax_kernel(float *input, float *output, int num);
|
||||
void relu_kernel(float *input, float *output, int num);
|
||||
void sigmoid_kernel(float *input, float *output, int num);
|
||||
void tanh_kernel(float *input, float *output, int num);
|
||||
void abs_kernel(float *input, float *output, int num);
|
||||
void sqrt_kernel(float *input, float *output, int num);
|
||||
void erf_kernel(float *input, float *output, int num);
|
||||
void softmax_kernel(float *input, float *output, size_t num);
|
||||
void relu_kernel(float *input, float *output, size_t num);
|
||||
void sigmoid_kernel(float *input, float *output, size_t num);
|
||||
void tanh_kernel(float *input, float *output, size_t num);
|
||||
void abs_kernel(float *input, float *output, size_t num);
|
||||
void sqrt_kernel(float *input, float *output, size_t num);
|
||||
void neg_kernel(float *input, float *output, size_t num);
|
||||
void gelu_kernel(float *input, float *output, size_t num);
|
||||
void erf_kernel(float *input, float *output, size_t num);
|
||||
|
||||
void unary_kernel(const Operator &_op) {
|
||||
auto op = as<UnaryObj>(_op);
|
||||
|
@ -30,6 +31,10 @@ void unary_kernel(const Operator &_op) {
|
|||
abs_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Sqrt)
|
||||
sqrt_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Gelu)
|
||||
gelu_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Neg)
|
||||
neg_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Erf)
|
||||
erf_kernel(inputData, outputData, num);
|
||||
else
|
||||
|
|
|
@ -12,6 +12,7 @@ class PoolingObj : public OperatorObj {
|
|||
int dh, dw;
|
||||
int ph, pw;
|
||||
int sh, sw;
|
||||
int ceilMode;
|
||||
int n, c, h, w;
|
||||
|
||||
public:
|
||||
|
@ -32,9 +33,12 @@ class PoolingObj : public OperatorObj {
|
|||
* @param pw Padding at the width dimension.
|
||||
* @param sh Stride at the height dimension.
|
||||
* @param sw Stride at the width dimension.
|
||||
* @param ceilMode Whether to use ceil(1) or floor(0) to compute the output
|
||||
* shape.
|
||||
*/
|
||||
PoolingObj(GraphObj *graph, OpType optype, Tensor input, Tensor output,
|
||||
int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw);
|
||||
int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw,
|
||||
int ceilMode);
|
||||
OP_CLONE(PoolingObj);
|
||||
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
|
||||
|
@ -50,6 +54,7 @@ class PoolingObj : public OperatorObj {
|
|||
int getPw() const { return pw; }
|
||||
int getSh() const { return sh; }
|
||||
int getSw() const { return sw; }
|
||||
int getCeilMode() const { return ceilMode; }
|
||||
|
||||
auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); }
|
||||
auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); }
|
||||
|
@ -62,15 +67,15 @@ class PoolingObj : public OperatorObj {
|
|||
class MaxPoolObj : public PoolingObj {
|
||||
public:
|
||||
MaxPoolObj(GraphObj *graph, Tensor input, Tensor output, int kh, int kw,
|
||||
int dh, int dw, int ph, int pw, int sh, int sw)
|
||||
int dh, int dw, int ph, int pw, int sh, int sw, int ceilMode)
|
||||
: PoolingObj(graph, OpType::MaxPool, input, output, kh, kw, dh, dw, ph,
|
||||
pw, sh, sw) {}
|
||||
pw, sh, sw, ceilMode) {}
|
||||
};
|
||||
class AvgPoolObj : public PoolingObj {
|
||||
public:
|
||||
AvgPoolObj(GraphObj *graph, Tensor input, Tensor output, int kh, int kw,
|
||||
int dh, int dw, int ph, int pw, int sh, int sw)
|
||||
int dh, int dw, int ph, int pw, int sh, int sw, int ceilMode)
|
||||
: PoolingObj(graph, OpType::AveragePool, input, output, kh, kw, dh, dw,
|
||||
ph, pw, sh, sw) {}
|
||||
ph, pw, sh, sw, ceilMode) {}
|
||||
};
|
||||
}; // namespace infini
|
||||
|
|
|
@ -258,6 +258,7 @@ class LogObj : public OperatorObj {
|
|||
};
|
||||
|
||||
DEFINE_UNARY_OBJ(Relu, OpType::Relu)
|
||||
DEFINE_UNARY_OBJ(Gelu, OpType::Gelu)
|
||||
DEFINE_UNARY_OBJ(Sigmoid, OpType::Sigmoid)
|
||||
DEFINE_UNARY_OBJ(Tanh, OpType::Tanh)
|
||||
// DEFINE_UNARY_OBJ(Softmax, OpType::Softmax)
|
||||
|
|
|
@ -228,11 +228,12 @@ class OnnxStub:
|
|||
"dilations": [1, 1],
|
||||
"pads": [0, 0, 0, 0],
|
||||
"strides": [1, 1],
|
||||
"ceil_mode": 0,
|
||||
},
|
||||
)
|
||||
(k, d, p, s) = (
|
||||
(k, d, p, s, ceil_mode) = (
|
||||
attributes[name]
|
||||
for name in ["kernel_shape", "dilations", "pads", "strides"]
|
||||
for name in ["kernel_shape", "dilations", "pads", "strides", "ceil_mode"]
|
||||
)
|
||||
if p[0] != p[2] or p[1] != p[3]:
|
||||
adapt = "{}-adapt".format(node.output[0])
|
||||
|
@ -250,6 +251,7 @@ class OnnxStub:
|
|||
0,
|
||||
s[0],
|
||||
s[1],
|
||||
ceil_mode,
|
||||
)
|
||||
else:
|
||||
tensors[node.output[0]] = self.handler.maxPool(
|
||||
|
@ -263,6 +265,7 @@ class OnnxStub:
|
|||
p[1],
|
||||
s[0],
|
||||
s[1],
|
||||
ceil_mode,
|
||||
)
|
||||
elif node.op_type == "AveragePool":
|
||||
attributes = _parse_attribute(
|
||||
|
@ -271,10 +274,11 @@ class OnnxStub:
|
|||
"kernel_shape": None,
|
||||
"pads": [0, 0, 0, 0],
|
||||
"strides": [1, 1],
|
||||
"ceil_mode": 0,
|
||||
},
|
||||
)
|
||||
(k, p, s) = (
|
||||
attributes[name] for name in ["kernel_shape", "pads", "strides"]
|
||||
(k, p, s, ceil_mode) = (
|
||||
attributes[name] for name in ["kernel_shape", "pads", "strides", "ceil_mode"]
|
||||
)
|
||||
if p[0] != p[2] or p[1] != p[3]:
|
||||
adapt = "{}-adapt".format(node.output[0])
|
||||
|
@ -292,6 +296,7 @@ class OnnxStub:
|
|||
0,
|
||||
s[0],
|
||||
s[1],
|
||||
ceil_mode,
|
||||
)
|
||||
else:
|
||||
tensors[node.output[0]] = self.handler.avgPool(
|
||||
|
@ -305,6 +310,7 @@ class OnnxStub:
|
|||
p[1],
|
||||
s[0],
|
||||
s[1],
|
||||
ceil_mode,
|
||||
)
|
||||
elif node.op_type == "GlobalAveragePool":
|
||||
[_, _, h, w] = _search_shape(model, node.input[0])
|
||||
|
@ -319,6 +325,7 @@ class OnnxStub:
|
|||
0,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
)
|
||||
elif node.op_type == "Add":
|
||||
tensors[node.output[0]] = self.handler.add(
|
||||
|
@ -367,6 +374,11 @@ class OnnxStub:
|
|||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Gelu":
|
||||
tensors[node.output[0]] = self.handler.gelu(
|
||||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Sigmoid":
|
||||
tensors[node.output[0]] = self.handler.sigmoid(
|
||||
tensors[node.input[0]],
|
||||
|
@ -396,6 +408,11 @@ class OnnxStub:
|
|||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Neg":
|
||||
tensors[node.output[0]] = self.handler.neg(
|
||||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Shape":
|
||||
tensors[node.output[0]] = self.handler.shape(
|
||||
tensors[node.input[0]],
|
||||
|
@ -866,7 +883,7 @@ class OnnxStub:
|
|||
)
|
||||
)
|
||||
elif ty == backend.OpTypeId.MaxPool:
|
||||
kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op)
|
||||
kh, kw, dh, dw, ph, pw, sh, sw, ceil_mode = backend.pool_attrs_of(op)
|
||||
ctx.push_node(
|
||||
make_node(
|
||||
ty.name,
|
||||
|
@ -877,10 +894,11 @@ class OnnxStub:
|
|||
pads=[ph, pw, ph, pw],
|
||||
dilations=[dh, dw],
|
||||
strides=[sh, sw],
|
||||
ceil_mode=ceil_mode,
|
||||
)
|
||||
)
|
||||
elif ty == backend.OpTypeId.AveragePool:
|
||||
kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op)
|
||||
kh, kw, dh, dw, ph, pw, sh, sw, ceil_mode = backend.pool_attrs_of(op)
|
||||
ctx.push_node(
|
||||
make_node(
|
||||
"AveragePool",
|
||||
|
@ -890,6 +908,7 @@ class OnnxStub:
|
|||
kernel_shape=[kh, kw],
|
||||
pads=[ph, pw, ph, pw],
|
||||
strides=[sh, sw],
|
||||
ceil_mode=ceil_mode,
|
||||
)
|
||||
)
|
||||
elif ty in [
|
||||
|
@ -899,6 +918,7 @@ class OnnxStub:
|
|||
backend.OpTypeId.Div,
|
||||
backend.OpTypeId.Pow,
|
||||
backend.OpTypeId.Relu,
|
||||
backend.OpTypeId.Gelu,
|
||||
backend.OpTypeId.Sigmoid,
|
||||
backend.OpTypeId.Tanh,
|
||||
backend.OpTypeId.Softmax,
|
||||
|
@ -907,6 +927,7 @@ class OnnxStub:
|
|||
backend.OpTypeId.PRelu,
|
||||
backend.OpTypeId.Sqrt,
|
||||
backend.OpTypeId.Erf,
|
||||
backend.OpTypeId.Neg,
|
||||
]:
|
||||
ctx.push_node(make_node(ty.name, inputs, outputs, name))
|
||||
elif ty == backend.OpTypeId.Flatten:
|
||||
|
|
|
@ -208,6 +208,14 @@ class TestStringMethods(unittest.TestCase):
|
|||
relu = make_node("Relu", ["x"], ["y"], name="relu")
|
||||
make_and_import_model(make_graph([relu], "relu", [x], [y]))
|
||||
|
||||
'''Gelu operator is not supported by onnx 14.1 currently.'''
|
||||
def test_gelu(self):
|
||||
pass
|
||||
# x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
# y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
# gelu = make_node("Gelu", ["x"], ["y"], name="gelu")
|
||||
# make_and_import_model(make_graph([gelu], "gelu", [x], [y]))
|
||||
|
||||
def test_erf(self):
|
||||
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
|
@ -243,6 +251,12 @@ class TestStringMethods(unittest.TestCase):
|
|||
y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
abs = make_node("Abs", ["x"], ["y"], name="abs")
|
||||
make_and_import_model(make_graph([abs], "abs", [x], [y]))
|
||||
|
||||
def test_neg(self):
|
||||
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
neg = make_node("Neg", ["x"], ["y"], name="neg")
|
||||
make_and_import_model(make_graph([neg], "neg", [x], [y]))
|
||||
|
||||
def test_identity(self):
|
||||
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
|
|
|
@ -210,10 +210,6 @@ void GraphObj::dataMalloc() {
|
|||
tensorToOffset[tensor.get()]));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MODE
|
||||
allocator.info();
|
||||
#endif
|
||||
}
|
||||
|
||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
||||
|
|
|
@ -95,30 +95,30 @@ Tensor GraphHandlerObj::batchNormalization(Tensor input, Tensor output,
|
|||
}
|
||||
|
||||
Tensor GraphHandlerObj::maxPool(Tensor input, Tensor output, int kh, int kw,
|
||||
int dh, int dw, int ph, int pw, int sh,
|
||||
int sw) {
|
||||
int dh, int dw, int ph, int pw, int sh, int sw,
|
||||
int ceilMode) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<MaxPoolObj>(std::move(input), output, kh, kw, dh,
|
||||
dw, ph, pw, sh, sw);
|
||||
dw, ph, pw, sh, sw, ceilMode);
|
||||
return output;
|
||||
} else {
|
||||
return g
|
||||
->addOp<MaxPoolObj>(std::move(input), output, kh, kw, dh, dw, ph,
|
||||
pw, sh, sw)
|
||||
pw, sh, sw, ceilMode)
|
||||
->getOutput();
|
||||
}
|
||||
}
|
||||
Tensor GraphHandlerObj::avgPool(Tensor input, Tensor output, int kh, int kw,
|
||||
int dh, int dw, int ph, int pw, int sh,
|
||||
int sw) {
|
||||
int dh, int dw, int ph, int pw, int sh, int sw,
|
||||
int ceilMode) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<AvgPoolObj>(std::move(input), output, kh, kw, dh,
|
||||
dw, ph, pw, sh, sw);
|
||||
dw, ph, pw, sh, sw, ceilMode);
|
||||
return output;
|
||||
} else {
|
||||
return g
|
||||
->addOp<AvgPoolObj>(std::move(input), output, kh, kw, dh, dw, ph,
|
||||
pw, sh, sw)
|
||||
pw, sh, sw, ceilMode)
|
||||
->getOutput();
|
||||
}
|
||||
}
|
||||
|
@ -155,10 +155,12 @@ DEFINE_ELEMENT_WISE_METHOD(max, Maximum)
|
|||
}
|
||||
|
||||
DEFINE_UNARY_METHOD(relu, Relu)
|
||||
DEFINE_UNARY_METHOD(gelu, Gelu)
|
||||
DEFINE_UNARY_METHOD(sigmoid, Sigmoid)
|
||||
DEFINE_UNARY_METHOD(tanh, Tanh)
|
||||
DEFINE_UNARY_METHOD(abs, Abs)
|
||||
DEFINE_UNARY_METHOD(sqrt, Sqrt)
|
||||
DEFINE_UNARY_METHOD(neg, Neg)
|
||||
DEFINE_UNARY_METHOD(shape, Shape)
|
||||
DEFINE_UNARY_METHOD(erf, Erf)
|
||||
|
||||
|
|
|
@ -145,10 +145,10 @@ void LazyAllocator::free(size_t addr, size_t size) {
|
|||
void *LazyAllocator::getPtr() {
|
||||
if (this->ptr == nullptr) {
|
||||
this->ptr = runtime->alloc(this->peak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc non-weight: %p %lu bytes\n",
|
||||
this->ptr, peak);
|
||||
#endif
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc non-weight: %p %lu
|
||||
// bytes\n", this->ptr, peak);
|
||||
// #endif
|
||||
}
|
||||
return this->ptr;
|
||||
}
|
||||
|
@ -156,10 +156,10 @@ void *LazyAllocator::getPtr() {
|
|||
void *LazyAllocator::getWeightPtr() {
|
||||
if (this->weightPtr == nullptr) {
|
||||
this->weightPtr = runtime->alloc(this->weightPeak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc weight: %p %lu bytes\n",
|
||||
this->weightPtr, weightPeak);
|
||||
#endif
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc weight: %p %lu bytes\n",
|
||||
// this->weightPtr, weightPeak);
|
||||
// #endif
|
||||
}
|
||||
return this->weightPtr;
|
||||
}
|
||||
|
|
|
@ -142,6 +142,7 @@ const char *OpType::toString() const {
|
|||
CASE(ReduceSum);
|
||||
CASE(ReduceSumSquare);
|
||||
CASE(Relu);
|
||||
CASE(Gelu);
|
||||
CASE(Reshape);
|
||||
CASE(Resize);
|
||||
CASE(ReverseSequence);
|
||||
|
@ -232,9 +233,9 @@ const char *OpType::toString() const {
|
|||
|
||||
bool OpType::isUnary() const {
|
||||
static const std::unordered_set<decltype(type)> set{
|
||||
Abs, Acos, Acosh, Asin, Asinh, Atan, Atanh, Cast, Ceil,
|
||||
Clip, Cos, Cosh, Erf, Exp, Floor, Log, Neg, Not,
|
||||
Relu, Round, Sigmoid, Sin, Sinh, Sqrt, Tan, Tanh,
|
||||
Abs, Acos, Acosh, Asin, Asinh, Atan, Atanh, Cast, Ceil,
|
||||
Clip, Cos, Cosh, Erf, Exp, Floor, Log, Neg, Not,
|
||||
Relu, Gelu, Round, Sigmoid, Sin, Sinh, Sqrt, Tan, Tanh,
|
||||
};
|
||||
|
||||
return set.find(type) != set.end();
|
||||
|
|
|
@ -95,6 +95,7 @@ void export_values(py::module &m) {
|
|||
.VALUE(OpType, BatchNormalization)
|
||||
.VALUE(OpType, Softmax)
|
||||
.VALUE(OpType, Relu)
|
||||
.VALUE(OpType, Gelu)
|
||||
.VALUE(OpType, PRelu)
|
||||
.VALUE(OpType, Sigmoid)
|
||||
.VALUE(OpType, Tanh)
|
||||
|
@ -103,6 +104,7 @@ void export_values(py::module &m) {
|
|||
.VALUE(OpType, Dropout)
|
||||
.VALUE(OpType, Cast)
|
||||
.VALUE(OpType, Sqrt)
|
||||
.VALUE(OpType, Neg)
|
||||
.VALUE(OpType, Expand)
|
||||
.VALUE(OpType, Erf)
|
||||
.VALUE(OpType, Where)
|
||||
|
@ -194,14 +196,14 @@ static std::tuple<float, float, bool> batch_norm_attrs_of(Operator op) {
|
|||
batchnorm->getTrainingMode());
|
||||
}
|
||||
|
||||
static std::tuple<int, int, int, int, int, int, int, int>
|
||||
static std::tuple<int, int, int, int, int, int, int, int, int>
|
||||
pool_attrs_of(Operator op) {
|
||||
IT_ASSERT(op->getOpType() == OpType::MaxPool ||
|
||||
op->getOpType() == OpType::AveragePool);
|
||||
auto pool = dynamic_cast<const PoolingObj *>(op.get());
|
||||
return std::make_tuple(pool->getKh(), pool->getKw(), pool->getDh(),
|
||||
pool->getDw(), pool->getPh(), pool->getPw(),
|
||||
pool->getSh(), pool->getSw());
|
||||
pool->getSh(), pool->getSw(), pool->getCeilMode());
|
||||
}
|
||||
|
||||
static std::tuple<std::optional<float>, std::optional<float>>
|
||||
|
@ -454,11 +456,13 @@ void init_graph_builder(py::module &m) {
|
|||
.def("min", &Handler::min, policy::move)
|
||||
.def("max", &Handler::max, policy::move)
|
||||
.def("relu", &Handler::relu, policy::move)
|
||||
.def("gelu", &Handler::gelu, policy::move)
|
||||
.def("sigmoid", &Handler::sigmoid, policy::move)
|
||||
.def("tanh", &Handler::tanh, policy::move)
|
||||
.def("softmax", &Handler::softmax, policy::move)
|
||||
.def("abs", &Handler::abs, policy::move)
|
||||
.def("sqrt", &Handler::sqrt, policy::move)
|
||||
.def("neg", &Handler::neg, policy::move)
|
||||
.def("shape", &Handler::shape, policy::move)
|
||||
.def("identity", &Handler::identity, policy::move)
|
||||
.def("flatten", &Handler::flatten, policy::move)
|
||||
|
|
|
@ -30,6 +30,7 @@ class PoolingCnnl : public BangKernelWithoutConfig {
|
|||
ph, pw, pw, sh, sw, dh, dw, false));
|
||||
|
||||
// get outputs
|
||||
// TODO: verify ceiling mode
|
||||
auto outVec = op->getOutput()->getDims();
|
||||
int outArray[4] = {outVec[0], outVec[1], outVec[2], outVec[3]};
|
||||
cnnlTensorDescriptor_t outDesc;
|
||||
|
|
|
@ -21,6 +21,7 @@ template <typename T> class NativePooling : public CpuKernelWithoutConfig {
|
|||
auto inoffset = i * (c * ih * iw) + j * ih * iw;
|
||||
for (auto h = 0; h < oh; h++) {
|
||||
for (auto w = 0; w < ow; w++) {
|
||||
// TODO: verify ceil mode
|
||||
T val =
|
||||
getPoolingValue(kh, kw, h * sh - ph, w * sw - pw,
|
||||
ih, iw, inptr + inoffset);
|
||||
|
|
|
@ -78,6 +78,11 @@ template <typename T> class NaiveSinh : public NativeUnary<T> {
|
|||
|
||||
template <typename T> class NaiveCosh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::cosh(val); }
|
||||
|
||||
template <typename T> class NaiveGelu : public NativeUnary<T> {
|
||||
T doCompute(T val) const override {
|
||||
return 0.5 * val * (1 + std::erf(val / std::sqrt(2)));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> class NaiveErf : public NativeUnary<T> {
|
||||
|
@ -102,6 +107,10 @@ template <typename T> class NaiveASinh : public NativeUnary<T> {
|
|||
|
||||
template <typename T> class NaiveATanh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::atanh(val); }
|
||||
|
||||
template <typename T> class NaiveNeg : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return -val; }
|
||||
|
||||
};
|
||||
|
||||
template <typename T> class Clip : public CpuKernelWithoutConfig {
|
||||
|
@ -164,6 +173,10 @@ REGISTER_KERNEL(Device::CPU, OpType::Relu, DataType::UInt32,
|
|||
NaiveRelu<uint32_t>, "reluNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Relu, DataType::Float32, NaiveRelu<float>,
|
||||
"reluNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Gelu, DataType::UInt32, NaiveGelu<float>,
|
||||
"geluNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Gelu, DataType::Float32, NaiveGelu<float>,
|
||||
"geluNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Sigmoid, DataType::UInt32,
|
||||
NaiveSigmoid<uint32_t>, "sigmoidNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Sigmoid, DataType::Float32,
|
||||
|
@ -180,6 +193,8 @@ REGISTER_KERNEL(Device::CPU, OpType::Sqrt, DataType::Float32, NaiveSqrt<float>,
|
|||
"sqrtNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Erf, DataType::Float32, NaiveErf<float>,
|
||||
"erfNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Neg, DataType::Float32, NaiveNeg<float>,
|
||||
"negNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Softmax, DataType::UInt32,
|
||||
NaiveSoftmax<uint32_t>, "softmaxNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Softmax, DataType::Float32,
|
||||
|
|
|
@ -29,17 +29,27 @@ class poolingCudnn : public CudaKernelWithoutConfig {
|
|||
pw, sh, sw));
|
||||
|
||||
// get outputs
|
||||
int outn, outc, outh, outw;
|
||||
checkCudnnError(cudnnGetPooling2dForwardOutputDim(
|
||||
poolingDesc, inDesc, &outn, &outc, &outh, &outw));
|
||||
auto outDims = op->getOutput()->getDims();
|
||||
int outn = outDims[0], outc = outDims[1], outh = outDims[2],
|
||||
outw = outDims[3];
|
||||
// NOTICE: cudnn pooling does not support ceil mode, so the shape
|
||||
// inference of cudnn pooling is not consistant with our framework. Ceil
|
||||
// mode is also supported in Pytorch and ONNX. See
|
||||
// https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d
|
||||
// and https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxPool
|
||||
// for reference.
|
||||
// TODO: Make sure the result after considering ceil mode is correct.
|
||||
// int outn, outc, outh, outw;
|
||||
// checkCudnnError(cudnnGetPooling2dForwardOutputDim(poolingDesc,
|
||||
// inDesc, &outn, &outc, &outh, &outw));
|
||||
cudnnTensorDescriptor_t outDesc;
|
||||
checkCudnnError(cudnnCreateTensorDescriptor(&outDesc));
|
||||
checkCudnnError(cudnnSetTensor4dDescriptor(outDesc, CUDNN_TENSOR_NCHW,
|
||||
CUDNN_DATA_FLOAT, outn, outc,
|
||||
outh, outw));
|
||||
IT_ASSERT((vector{outn, outc, outh, outw}) ==
|
||||
op->getOutput()->getDims(),
|
||||
"cuDNN output shape mismatches with OP output shape");
|
||||
// IT_ASSERT((vector{outn, outc, outh, outw}) ==
|
||||
// op->getOutput()->getDims(),
|
||||
// "cuDNN output shape mismatches with OP output shape");
|
||||
|
||||
float alpha = 1.f, beta = 0.f;
|
||||
checkCudnnError(cudnnPoolingForward(context->cudnnHandle(), poolingDesc,
|
||||
|
|
|
@ -140,6 +140,10 @@ REGISTER_KERNEL(Device::CUDA, OpType::Abs, DataType::Float32, UnaryCuda,
|
|||
"Abs_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Sqrt, DataType::Float32, UnaryCuda,
|
||||
"Sqrt_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Gelu, DataType::Float32, UnaryCuda,
|
||||
"Gelu_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Neg, DataType::Float32, UnaryCuda,
|
||||
"Neg_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Erf, DataType::Float32, UnaryCuda,
|
||||
"Erf_CUDA_Float32");
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ constexpr unsigned int num_threads() { return 32 * 4; }
|
|||
constexpr int thread_work_size() { return 4; }
|
||||
constexpr int block_work_size() { return thread_work_size() * num_threads(); }
|
||||
|
||||
__global__ void _softmax_kernel1(float *input, float *output, int n) {
|
||||
__global__ void _softmax_kernel1(float *input, float *output, size_t n) {
|
||||
float sum = 0.0f;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
sum += pow(E_CONSTANT, input[i]);
|
||||
|
@ -16,106 +16,136 @@ __global__ void _softmax_kernel1(float *input, float *output, int n) {
|
|||
*output = sum;
|
||||
}
|
||||
|
||||
__global__ void _softmax_kernel2(float *input, float *output, int n) {
|
||||
__global__ void _softmax_kernel2(float *input, float *output, size_t n) {
|
||||
float sum = *output;
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = pow(E_CONSTANT, input[i]) / sum;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _relu_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _relu_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = max(input[i], float(0));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _sigmoid_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _sigmoid_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = 1 / (1 + pow(E_CONSTANT, -input[i]));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _tanh_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _tanh_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = (pow(E_CONSTANT, input[i]) - pow(E_CONSTANT, -input[i])) /
|
||||
(pow(E_CONSTANT, input[i]) + pow(E_CONSTANT, -input[i]));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _abs_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _abs_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = input[i] < 0 ? -input[i] : input[i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _sqrt_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _sqrt_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = sqrt(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _erf_kernel(float *input, float *output, int n) {
|
||||
__global__ void _gelu_kernel(float *input, float *output, size_t n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
float x = input[i];
|
||||
output[i] = 0.5 * x * (1 + erf(x / sqrt(2.0f)));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _erf_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
output[i] = erf(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void _neg_kernel(T *input, T *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = -input[i];
|
||||
}
|
||||
}
|
||||
|
||||
namespace infini {
|
||||
void softmax_kernel(float *input, float *output, int num) {
|
||||
void softmax_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_softmax_kernel1<<<1, 1>>>(input, output, num);
|
||||
_softmax_kernel2<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void relu_kernel(float *input, float *output, int num) {
|
||||
void relu_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_relu_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void sigmoid_kernel(float *input, float *output, int num) {
|
||||
void sigmoid_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_sigmoid_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void tanh_kernel(float *input, float *output, int num) {
|
||||
void tanh_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_tanh_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void abs_kernel(float *input, float *output, int num) {
|
||||
void abs_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_abs_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void sqrt_kernel(float *input, float *output, int num) {
|
||||
void sqrt_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_sqrt_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void erf_kernel(float *input, float *output, int num) {
|
||||
void gelu_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_gelu_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void erf_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_erf_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void neg_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_neg_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
}; // namespace infini
|
||||
|
|
|
@ -4,11 +4,9 @@ namespace infini {
|
|||
|
||||
PoolingObj::PoolingObj(GraphObj *graph, OpType optype, Tensor input,
|
||||
Tensor output, int kh, int kw, int dh, int dw, int ph,
|
||||
int pw, int sh, int sw)
|
||||
: OperatorObj(optype, {input}, {output}),
|
||||
|
||||
kh(kh), kw(kw), dh(dh), dw(dw), ph(ph), pw(pw), sh(sh), sw(sw),
|
||||
|
||||
int pw, int sh, int sw, int ceilMode)
|
||||
: OperatorObj(optype, {input}, {output}), kh(kh), kw(kw), dh(dh), dw(dw),
|
||||
ph(ph), pw(pw), sh(sh), sw(sw), ceilMode(ceilMode),
|
||||
n(input->getDims()[0]), c(input->getDims()[1]), h(input->getDims()[2]),
|
||||
w(input->getDims()[3]) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
|
@ -18,8 +16,14 @@ optional<vector<Shape>> PoolingObj::inferShape(const TensorVec &inputs) const {
|
|||
const auto &input = inputs[0];
|
||||
auto h = input->getDims()[input->getRank() - 2],
|
||||
w = input->getDims()[input->getRank() - 1];
|
||||
int oh = (h - (kh - sh) + ph * 2) / sh;
|
||||
int ow = (w - (kw - sw) + pw * 2) / sw;
|
||||
int oh, ow;
|
||||
if (ceilMode) {
|
||||
oh = ceil(((float)(h + 2 * ph - dh * (kh - 1) - 1)) / sh + 1);
|
||||
ow = ceil(((float)(w + 2 * pw - dw * (kw - 1) - 1)) / sw + 1);
|
||||
} else {
|
||||
oh = floor(((float)(h + 2 * ph - dh * (kh - 1) - 1)) / sh + 1);
|
||||
ow = floor(((float)(w + 2 * pw - dw * (kw - 1) - 1)) / sw + 1);
|
||||
}
|
||||
auto ret = input->getDims();
|
||||
ret[input->getRank() - 2] = oh;
|
||||
ret[input->getRank() - 1] = ow;
|
||||
|
@ -34,17 +38,19 @@ std::string PoolingObj::toString() const {
|
|||
os << "p=[" << ph << "," << pw << "],";
|
||||
os << "s=[" << sh << "," << sw << "],";
|
||||
os << "d=[" << dh << "," << dw << "],";
|
||||
os << "ceil mode=" << ceilMode << ",";
|
||||
os << "input=" << inputs[0]->getGuid() << ",";
|
||||
os << "output=" << outputs[0]->getGuid() << ")";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> PoolingObj::getWorkloadVector() const {
|
||||
return {type.underlying(), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw};
|
||||
return {type.underlying(), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw,
|
||||
ceilMode};
|
||||
}
|
||||
|
||||
vector<int> PoolingObj::getOpAttrVector() const {
|
||||
return {type.underlying(), kh, kw, ph, pw, sh, sw, dh, dw};
|
||||
return {type.underlying(), kh, kw, ph, pw, sh, sw, dh, dw, ceilMode};
|
||||
}
|
||||
|
||||
}; // namespace infini
|
||||
|
|
|
@ -208,16 +208,13 @@ TEST(MatchGraph, multi_output) {
|
|||
SubGraph subg0 = make_ref<SubGraphObj>(runtime, TensorVec{i});
|
||||
{
|
||||
auto maxpool =
|
||||
subg0->addOp<MaxPoolObj>(i, nullptr, 3, 3, 0, 0, 0, 0, 2, 2);
|
||||
subg0->addOp<MaxPoolObj>(i, nullptr, 3, 3, 1, 1, 0, 0, 2, 2, 0);
|
||||
Tensor w0 = subg0->addTensor(Shape{64, 192, 1, 1}, DataType::UInt32);
|
||||
auto conv0 = subg0->addOp<ConvObj>(maxpool->getOutput(0), w0, nullptr);
|
||||
auto relu0 = subg0->addOp<ReluObj>(conv0->getOutput(0), nullptr);
|
||||
|
||||
auto pad = subg0->addOp<PadObj>(maxpool->getOutput(0), nullptr,
|
||||
vector<int>{0, 0, 1, 1, 0, 0, 1, 1},
|
||||
std::nullopt);
|
||||
auto avgpool = subg0->addOp<AvgPoolObj>(pad->getOutput(0), nullptr, 3,
|
||||
3, 0, 0, 0, 0, 1, 1);
|
||||
auto avgpool = subg0->addOp<AvgPoolObj>(maxpool->getOutput(0), nullptr,
|
||||
3, 3, 0, 0, 0, 0, 1, 1, 0);
|
||||
subg0->setOutputs(
|
||||
TensorVec{relu0->getOutput(0), avgpool->getOutput(0)});
|
||||
}
|
||||
|
@ -225,8 +222,9 @@ TEST(MatchGraph, multi_output) {
|
|||
SubGraph subg1 =
|
||||
make_ref<SubGraphObj>(runtime, TensorVec{i->clone(runtime)});
|
||||
{
|
||||
auto avgpool = subg1->addOp<AvgPoolObj>(
|
||||
subg1->getInputsFromOutside()[0], nullptr, 3, 3, 0, 0, 0, 0, 2, 2);
|
||||
auto avgpool =
|
||||
subg1->addOp<AvgPoolObj>(subg1->getInputsFromOutside()[0], nullptr,
|
||||
3, 3, 1, 1, 0, 0, 2, 2, 0);
|
||||
|
||||
auto relu0 = subg1->addOp<ReluObj>(avgpool->getOutput(0), nullptr);
|
||||
|
||||
|
@ -295,7 +293,7 @@ TEST(MatchGraph, multi_input_output) {
|
|||
Tensor w2 = subg0->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32);
|
||||
auto conv2 = subg0->addOp<ConvObj>(relu1->getOutput(0), w2, nullptr);
|
||||
auto maxpool = subg0->addOp<MaxPoolObj>(relu1->getOutput(0), nullptr, 3,
|
||||
3, 0, 0, 0, 0, 2, 2);
|
||||
3, 1, 1, 0, 0, 2, 2, 0);
|
||||
subg0->setOutputs(
|
||||
TensorVec{conv2->getOutput(0), maxpool->getOutput(0)});
|
||||
}
|
||||
|
@ -317,7 +315,7 @@ TEST(MatchGraph, multi_input_output) {
|
|||
Tensor w2 = subg1->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32);
|
||||
auto conv2 = subg1->addOp<ConvObj>(relu1->getOutput(0), w2, nullptr);
|
||||
auto maxpool = subg1->addOp<MaxPoolObj>(relu1->getOutput(0), nullptr, 3,
|
||||
3, 0, 0, 0, 0, 2, 2);
|
||||
3, 1, 1, 0, 0, 2, 2, 0);
|
||||
subg1->setOutputs(
|
||||
TensorVec{maxpool->getOutput(0), conv2->getOutput(0)});
|
||||
}
|
||||
|
@ -338,7 +336,7 @@ TEST(MatchGraph, multi_input_output) {
|
|||
Tensor w2 = subg2->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32);
|
||||
auto conv2 = subg2->addOp<ConvObj>(relu1->getOutput(0), w2, nullptr);
|
||||
auto avgpool = subg2->addOp<AvgPoolObj>(relu1->getOutput(0), nullptr, 3,
|
||||
3, 0, 0, 0, 0, 2, 2);
|
||||
3, 1, 1, 0, 0, 2, 2, 0);
|
||||
subg2->setOutputs(
|
||||
TensorVec{conv2->getOutput(0), avgpool->getOutput(0)});
|
||||
}
|
||||
|
@ -349,7 +347,7 @@ TEST(MatchGraph, multi_input_output) {
|
|||
auto i = g->addTensor(Shape{1, 64, 112, 112}, DataType::UInt32);
|
||||
auto relu = g->addOp<ReluObj>(i, nullptr);
|
||||
auto maxPool = g->addOp<MaxPoolObj>(relu->getOutput(0), nullptr, 3, 3,
|
||||
0, 0, 1, 1, 2, 2);
|
||||
1, 1, 1, 1, 2, 2, 0);
|
||||
auto out0 =
|
||||
v.addSubGraph(subg0, {relu->getOutput(0), maxPool->getOutput(0)});
|
||||
auto out1 =
|
||||
|
|
|
@ -8,7 +8,8 @@
|
|||
|
||||
namespace infini {
|
||||
|
||||
template <class T>
|
||||
template <class T, typename std::enable_if<std::is_base_of<PoolingObj, T>{},
|
||||
int>::type = 0>
|
||||
void testPooling(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape) {
|
||||
// Runtime
|
||||
|
@ -23,7 +24,8 @@ void testPooling(const std::function<void(void *, size_t, DataType)> &generator,
|
|||
// GPU
|
||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
||||
auto gpuOp = bangGraph->addOp<T>(inputGpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2);
|
||||
auto gpuOp =
|
||||
bangGraph->addOp<T>(inputGpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0);
|
||||
bangGraph->dataMalloc();
|
||||
bangRuntime->run(bangGraph);
|
||||
auto outputGpu = gpuOp->getOutput();
|
||||
|
|
|
@ -29,7 +29,7 @@ TEST(CUDA_Inception_v3_block, run) {
|
|||
TensorVec outputs;
|
||||
vector<OpVec> ops;
|
||||
auto maxpool =
|
||||
g->addOp<MaxPoolObj>(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1);
|
||||
g->addOp<MaxPoolObj>(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1, 0);
|
||||
auto chainInput = maxpool->getOutput();
|
||||
for (auto &pathConfig : configs) {
|
||||
int inputChannels = initialChannels;
|
||||
|
@ -52,7 +52,7 @@ TEST(CUDA_Inception_v3_block, run) {
|
|||
inputChannels = f;
|
||||
} else { // Add AveragePool
|
||||
auto pool = g->addOp<AvgPoolObj>(input, nullptr, r, r, 1, 1,
|
||||
r / 2, r / 2, 1, 1);
|
||||
r / 2, r / 2, 1, 1, 0);
|
||||
input = pool->getOutput();
|
||||
ops.back().emplace_back(pool);
|
||||
}
|
||||
|
|
|
@ -9,7 +9,8 @@ namespace infini {
|
|||
using KDPS = vector<int>;
|
||||
using ExpectOutput = vector<float>;
|
||||
|
||||
template <class T>
|
||||
template <class T, typename std::enable_if<std::is_base_of<PoolingObj, T>{},
|
||||
int>::type = 0>
|
||||
void testPoolCudnn(
|
||||
const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape, const KDPS &kdps, const ExpectOutput &ansVec) {
|
||||
|
@ -24,7 +25,7 @@ void testPoolCudnn(
|
|||
Graph g = make_ref<GraphObj>(cudaRuntime);
|
||||
auto i0 = g->cloneTensor(i0cpu);
|
||||
auto pool = g->addOp<T>(i0, nullptr, kdps[0], kdps[1], kdps[2], kdps[3],
|
||||
kdps[4], kdps[5], kdps[6], kdps[7]);
|
||||
kdps[4], kdps[5], kdps[6], kdps[7], 0);
|
||||
|
||||
// allocate CUDA memory
|
||||
g->dataMalloc();
|
||||
|
|
|
@ -46,11 +46,16 @@ TEST(cuDNN_Unary, run) {
|
|||
testUnary<SigmoidObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<TanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<NegObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<ErfObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
// more shapes
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{13});
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{4, 3});
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{2, 3, 4, 5, 6});
|
||||
|
||||
testUnary<GeluObj>(IncrementalGenerator(), Shape{1});
|
||||
testUnary<GeluObj>(IncrementalGenerator(), Shape{1, 2});
|
||||
testUnary<GeluObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
}
|
||||
|
||||
} // namespace infini
|
||||
|
|
|
@ -12,16 +12,16 @@ TEST(MaxPool, ShapeInference) {
|
|||
Graph g = make_ref<GraphObj>(cpuRuntime);
|
||||
Tensor i = g->addTensor({1, 64, 162, 162}, DataType::UInt32);
|
||||
const int kh = 3, kw = 3, dh = 1, dw = 1, ph = 0, pw = 0, sh = 2,
|
||||
sw = 2;
|
||||
auto op =
|
||||
g->addOp<MaxPoolObj>(i, nullptr, kh, kw, dh, dw, ph, pw, sh, sw);
|
||||
sw = 2, ceilMode = 0;
|
||||
auto op = g->addOp<MaxPoolObj>(i, nullptr, kh, kw, dh, dw, ph, pw, sh,
|
||||
sw, ceilMode);
|
||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 64, 80, 80}));
|
||||
}
|
||||
|
||||
{ // dilation & stride
|
||||
Graph g = make_ref<GraphObj>(cpuRuntime);
|
||||
Tensor i = g->addTensor({1, 64, 162, 162}, DataType::UInt32);
|
||||
auto op = g->addOp<MaxPoolObj>(i, nullptr, 4, 3, 1, 1, 2, 1, 1, 2);
|
||||
auto op = g->addOp<MaxPoolObj>(i, nullptr, 4, 3, 1, 1, 2, 1, 1, 2, 0);
|
||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 64, 163, 81}));
|
||||
}
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ TEST(MaxPool, NaiveCPU) {
|
|||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph g = make_ref<GraphObj>(cpuRuntime);
|
||||
Tensor i = g->addTensor({1, 2, 5, 5}, DataType::UInt32);
|
||||
auto op = g->addOp<MaxPoolObj>(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2);
|
||||
auto op = g->addOp<MaxPoolObj>(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0);
|
||||
|
||||
g->dataMalloc();
|
||||
i->setData(IncrementalGenerator());
|
||||
|
@ -49,7 +49,7 @@ TEST(AvgPool, NaiveCPU) {
|
|||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph g = make_ref<GraphObj>(cpuRuntime);
|
||||
Tensor i = g->addTensor({1, 2, 5, 5}, DataType::Float32);
|
||||
auto op = g->addOp<AvgPoolObj>(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2);
|
||||
auto op = g->addOp<AvgPoolObj>(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0);
|
||||
|
||||
g->dataMalloc();
|
||||
i->setData(IncrementalGenerator());
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
#include "core/graph.h"
|
||||
#include "core/kernel.h"
|
||||
#include "core/runtime.h"
|
||||
#include "operators/unary.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
using ExpectOutput = vector<float>;
|
||||
TEST(Unary, ShapeInference) {
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
{
|
||||
Graph g = make_ref<GraphObj>(runtime);
|
||||
Tensor i0 = g->addTensor({2}, DataType::Float32);
|
||||
auto op = g->addOp<GeluObj>(i0, nullptr);
|
||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{2}));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue