From 7a9fcd93b243e94beceaca93a44eafbf750199a4 Mon Sep 17 00:00:00 2001 From: Haojie Wang Date: Mon, 9 Oct 2023 20:51:39 +0800 Subject: [PATCH] Pooling ceil mode (#155) * add ceil mode for pooling * do not print debug info for allocator by default * fix test bugs after introducing pooling ceil mode * fix onnx import bug --- include/core/graph_handler.h | 4 ++-- include/operators/pooling.h | 15 +++++++++----- pyinfinitensor/src/pyinfinitensor/onnx.py | 21 ++++++++++++++------ src/core/graph.cc | 4 ---- src/core/graph_handler.cc | 16 +++++++-------- src/core/lazy_allocator.cc | 16 +++++++-------- src/ffi/ffi_infinitensor.cc | 4 ++-- src/kernels/bang/pooling.cc | 1 + src/kernels/cpu/pooling.cc | 1 + src/kernels/cuda/pooling.cc | 22 +++++++++++++++------ src/operators/pooling.cc | 24 ++++++++++++++--------- test/core/test_graph_replace.cc | 22 ++++++++++----------- test/kernels/bang/test_bang_pooling.cc | 6 ++++-- test/kernels/cuda/test_cuda_inception.cc | 4 ++-- test/kernels/cuda/test_cuda_pooling.cc | 5 +++-- test/operators/test_pooling.cc | 12 ++++++------ 16 files changed, 103 insertions(+), 74 deletions(-) diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index 7f514ebd..c93a355f 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -32,9 +32,9 @@ class GraphHandlerObj { float momentum, float eps, bool training); Tensor maxPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw, - int ph, int pw, int sh, int sw); + int ph, int pw, int sh, int sw, int ceilMode); Tensor avgPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw, - int ph, int pw, int sh, int sw); + int ph, int pw, int sh, int sw, int ceilMode); Tensor add(Tensor a, Tensor b, Tensor c); Tensor sub(Tensor a, Tensor b, Tensor c); diff --git a/include/operators/pooling.h b/include/operators/pooling.h index a163ab0f..7f28224d 100644 --- a/include/operators/pooling.h +++ b/include/operators/pooling.h @@ -12,6 +12,7 @@ class PoolingObj : public OperatorObj { int dh, dw; int ph, pw; int sh, sw; + int ceilMode; int n, c, h, w; public: @@ -32,9 +33,12 @@ class PoolingObj : public OperatorObj { * @param pw Padding at the width dimension. * @param sh Stride at the height dimension. * @param sw Stride at the width dimension. + * @param ceilMode Whether to use ceil(1) or floor(0) to compute the output + * shape. */ PoolingObj(GraphObj *graph, OpType optype, Tensor input, Tensor output, - int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw); + int kh, int kw, int dh, int dw, int ph, int pw, int sh, int sw, + int ceilMode); OP_CLONE(PoolingObj); optional> inferShape(const TensorVec &inputs) const override; @@ -50,6 +54,7 @@ class PoolingObj : public OperatorObj { int getPw() const { return pw; } int getSh() const { return sh; } int getSw() const { return sw; } + int getCeilMode() const { return ceilMode; } auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); } auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); } @@ -62,15 +67,15 @@ class PoolingObj : public OperatorObj { class MaxPoolObj : public PoolingObj { public: MaxPoolObj(GraphObj *graph, Tensor input, Tensor output, int kh, int kw, - int dh, int dw, int ph, int pw, int sh, int sw) + int dh, int dw, int ph, int pw, int sh, int sw, int ceilMode) : PoolingObj(graph, OpType::MaxPool, input, output, kh, kw, dh, dw, ph, - pw, sh, sw) {} + pw, sh, sw, ceilMode) {} }; class AvgPoolObj : public PoolingObj { public: AvgPoolObj(GraphObj *graph, Tensor input, Tensor output, int kh, int kw, - int dh, int dw, int ph, int pw, int sh, int sw) + int dh, int dw, int ph, int pw, int sh, int sw, int ceilMode) : PoolingObj(graph, OpType::AveragePool, input, output, kh, kw, dh, dw, - ph, pw, sh, sw) {} + ph, pw, sh, sw, ceilMode) {} }; }; // namespace infini diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index f8e53b1c..149c284f 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -228,11 +228,12 @@ class OnnxStub: "dilations": [1, 1], "pads": [0, 0, 0, 0], "strides": [1, 1], + "ceil_mode": 0, }, ) - (k, d, p, s) = ( + (k, d, p, s, ceil_mode) = ( attributes[name] - for name in ["kernel_shape", "dilations", "pads", "strides"] + for name in ["kernel_shape", "dilations", "pads", "strides", "ceil_mode"] ) if p[0] != p[2] or p[1] != p[3]: adapt = "{}-adapt".format(node.output[0]) @@ -250,6 +251,7 @@ class OnnxStub: 0, s[0], s[1], + ceil_mode, ) else: tensors[node.output[0]] = self.handler.maxPool( @@ -263,6 +265,7 @@ class OnnxStub: p[1], s[0], s[1], + ceil_mode, ) elif node.op_type == "AveragePool": attributes = _parse_attribute( @@ -271,10 +274,11 @@ class OnnxStub: "kernel_shape": None, "pads": [0, 0, 0, 0], "strides": [1, 1], + "ceil_mode": 0, }, ) - (k, p, s) = ( - attributes[name] for name in ["kernel_shape", "pads", "strides"] + (k, p, s, ceil_mode) = ( + attributes[name] for name in ["kernel_shape", "pads", "strides", "ceil_mode"] ) if p[0] != p[2] or p[1] != p[3]: adapt = "{}-adapt".format(node.output[0]) @@ -292,6 +296,7 @@ class OnnxStub: 0, s[0], s[1], + ceil_mode, ) else: tensors[node.output[0]] = self.handler.avgPool( @@ -305,6 +310,7 @@ class OnnxStub: p[1], s[0], s[1], + ceil_mode, ) elif node.op_type == "GlobalAveragePool": [_, _, h, w] = _search_shape(model, node.input[0]) @@ -319,6 +325,7 @@ class OnnxStub: 0, 1, 1, + 0, ) elif node.op_type == "Add": tensors[node.output[0]] = self.handler.add( @@ -866,7 +873,7 @@ class OnnxStub: ) ) elif ty == backend.OpTypeId.MaxPool: - kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) + kh, kw, dh, dw, ph, pw, sh, sw, ceil_mode = backend.pool_attrs_of(op) ctx.push_node( make_node( ty.name, @@ -877,10 +884,11 @@ class OnnxStub: pads=[ph, pw, ph, pw], dilations=[dh, dw], strides=[sh, sw], + ceil_mode=ceil_mode, ) ) elif ty == backend.OpTypeId.AveragePool: - kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) + kh, kw, dh, dw, ph, pw, sh, sw, ceil_mode = backend.pool_attrs_of(op) ctx.push_node( make_node( "AveragePool", @@ -890,6 +898,7 @@ class OnnxStub: kernel_shape=[kh, kw], pads=[ph, pw, ph, pw], strides=[sh, sw], + ceil_mode=ceil_mode, ) ) elif ty in [ diff --git a/src/core/graph.cc b/src/core/graph.cc index 0f844c34..f662cf32 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -210,10 +210,6 @@ void GraphObj::dataMalloc() { tensorToOffset[tensor.get()])); } } - -#ifdef DEBUG_MODE - allocator.info(); -#endif } Tensor GraphObj::addTensor(Shape dim, DataType dtype) { diff --git a/src/core/graph_handler.cc b/src/core/graph_handler.cc index a804a8c7..7267fddf 100644 --- a/src/core/graph_handler.cc +++ b/src/core/graph_handler.cc @@ -95,30 +95,30 @@ Tensor GraphHandlerObj::batchNormalization(Tensor input, Tensor output, } Tensor GraphHandlerObj::maxPool(Tensor input, Tensor output, int kh, int kw, - int dh, int dw, int ph, int pw, int sh, - int sw) { + int dh, int dw, int ph, int pw, int sh, int sw, + int ceilMode) { if (output) { g->addOpWithOutputs(std::move(input), output, kh, kw, dh, - dw, ph, pw, sh, sw); + dw, ph, pw, sh, sw, ceilMode); return output; } else { return g ->addOp(std::move(input), output, kh, kw, dh, dw, ph, - pw, sh, sw) + pw, sh, sw, ceilMode) ->getOutput(); } } Tensor GraphHandlerObj::avgPool(Tensor input, Tensor output, int kh, int kw, - int dh, int dw, int ph, int pw, int sh, - int sw) { + int dh, int dw, int ph, int pw, int sh, int sw, + int ceilMode) { if (output) { g->addOpWithOutputs(std::move(input), output, kh, kw, dh, - dw, ph, pw, sh, sw); + dw, ph, pw, sh, sw, ceilMode); return output; } else { return g ->addOp(std::move(input), output, kh, kw, dh, dw, ph, - pw, sh, sw) + pw, sh, sw, ceilMode) ->getOutput(); } } diff --git a/src/core/lazy_allocator.cc b/src/core/lazy_allocator.cc index a5014e5c..c3407320 100644 --- a/src/core/lazy_allocator.cc +++ b/src/core/lazy_allocator.cc @@ -145,10 +145,10 @@ void LazyAllocator::free(size_t addr, size_t size) { void *LazyAllocator::getPtr() { if (this->ptr == nullptr) { this->ptr = runtime->alloc(this->peak); -#ifdef DEBUG_MODE - printf("LazyAllocator really alloc non-weight: %p %lu bytes\n", - this->ptr, peak); -#endif + // #ifdef DEBUG_MODE + // printf("LazyAllocator really alloc non-weight: %p %lu + // bytes\n", this->ptr, peak); + // #endif } return this->ptr; } @@ -156,10 +156,10 @@ void *LazyAllocator::getPtr() { void *LazyAllocator::getWeightPtr() { if (this->weightPtr == nullptr) { this->weightPtr = runtime->alloc(this->weightPeak); -#ifdef DEBUG_MODE - printf("LazyAllocator really alloc weight: %p %lu bytes\n", - this->weightPtr, weightPeak); -#endif + // #ifdef DEBUG_MODE + // printf("LazyAllocator really alloc weight: %p %lu bytes\n", + // this->weightPtr, weightPeak); + // #endif } return this->weightPtr; } diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index bea3f4bc..92feba2a 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -187,14 +187,14 @@ static std::tuple batch_norm_attrs_of(Operator op) { batchnorm->getTrainingMode()); } -static std::tuple +static std::tuple pool_attrs_of(Operator op) { IT_ASSERT(op->getOpType() == OpType::MaxPool || op->getOpType() == OpType::AveragePool); auto pool = dynamic_cast(op.get()); return std::make_tuple(pool->getKh(), pool->getKw(), pool->getDh(), pool->getDw(), pool->getPh(), pool->getPw(), - pool->getSh(), pool->getSw()); + pool->getSh(), pool->getSw(), pool->getCeilMode()); } static std::tuple, std::optional> diff --git a/src/kernels/bang/pooling.cc b/src/kernels/bang/pooling.cc index 6f907705..8a91b466 100644 --- a/src/kernels/bang/pooling.cc +++ b/src/kernels/bang/pooling.cc @@ -30,6 +30,7 @@ class PoolingCnnl : public BangKernelWithoutConfig { ph, pw, pw, sh, sw, dh, dw, false)); // get outputs + // TODO: verify ceiling mode auto outVec = op->getOutput()->getDims(); int outArray[4] = {outVec[0], outVec[1], outVec[2], outVec[3]}; cnnlTensorDescriptor_t outDesc; diff --git a/src/kernels/cpu/pooling.cc b/src/kernels/cpu/pooling.cc index acb88d9c..1242e14f 100644 --- a/src/kernels/cpu/pooling.cc +++ b/src/kernels/cpu/pooling.cc @@ -21,6 +21,7 @@ template class NativePooling : public CpuKernelWithoutConfig { auto inoffset = i * (c * ih * iw) + j * ih * iw; for (auto h = 0; h < oh; h++) { for (auto w = 0; w < ow; w++) { + // TODO: verify ceil mode T val = getPoolingValue(kh, kw, h * sh - ph, w * sw - pw, ih, iw, inptr + inoffset); diff --git a/src/kernels/cuda/pooling.cc b/src/kernels/cuda/pooling.cc index 90aa3944..d8b2e0f8 100644 --- a/src/kernels/cuda/pooling.cc +++ b/src/kernels/cuda/pooling.cc @@ -29,17 +29,27 @@ class poolingCudnn : public CudaKernelWithoutConfig { pw, sh, sw)); // get outputs - int outn, outc, outh, outw; - checkCudnnError(cudnnGetPooling2dForwardOutputDim( - poolingDesc, inDesc, &outn, &outc, &outh, &outw)); + auto outDims = op->getOutput()->getDims(); + int outn = outDims[0], outc = outDims[1], outh = outDims[2], + outw = outDims[3]; + // NOTICE: cudnn pooling does not support ceil mode, so the shape + // inference of cudnn pooling is not consistant with our framework. Ceil + // mode is also supported in Pytorch and ONNX. See + // https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d + // and https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxPool + // for reference. + // TODO: Make sure the result after considering ceil mode is correct. + // int outn, outc, outh, outw; + // checkCudnnError(cudnnGetPooling2dForwardOutputDim(poolingDesc, + // inDesc, &outn, &outc, &outh, &outw)); cudnnTensorDescriptor_t outDesc; checkCudnnError(cudnnCreateTensorDescriptor(&outDesc)); checkCudnnError(cudnnSetTensor4dDescriptor(outDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, outn, outc, outh, outw)); - IT_ASSERT((vector{outn, outc, outh, outw}) == - op->getOutput()->getDims(), - "cuDNN output shape mismatches with OP output shape"); + // IT_ASSERT((vector{outn, outc, outh, outw}) == + // op->getOutput()->getDims(), + // "cuDNN output shape mismatches with OP output shape"); float alpha = 1.f, beta = 0.f; checkCudnnError(cudnnPoolingForward(context->cudnnHandle(), poolingDesc, diff --git a/src/operators/pooling.cc b/src/operators/pooling.cc index d7153699..b1bb2e3d 100644 --- a/src/operators/pooling.cc +++ b/src/operators/pooling.cc @@ -4,11 +4,9 @@ namespace infini { PoolingObj::PoolingObj(GraphObj *graph, OpType optype, Tensor input, Tensor output, int kh, int kw, int dh, int dw, int ph, - int pw, int sh, int sw) - : OperatorObj(optype, {input}, {output}), - - kh(kh), kw(kw), dh(dh), dw(dw), ph(ph), pw(pw), sh(sh), sw(sw), - + int pw, int sh, int sw, int ceilMode) + : OperatorObj(optype, {input}, {output}), kh(kh), kw(kw), dh(dh), dw(dw), + ph(ph), pw(pw), sh(sh), sw(sw), ceilMode(ceilMode), n(input->getDims()[0]), c(input->getDims()[1]), h(input->getDims()[2]), w(input->getDims()[3]) { IT_ASSERT(checkValid(graph)); @@ -18,8 +16,14 @@ optional> PoolingObj::inferShape(const TensorVec &inputs) const { const auto &input = inputs[0]; auto h = input->getDims()[input->getRank() - 2], w = input->getDims()[input->getRank() - 1]; - int oh = (h - (kh - sh) + ph * 2) / sh; - int ow = (w - (kw - sw) + pw * 2) / sw; + int oh, ow; + if (ceilMode) { + oh = ceil(((float)(h + 2 * ph - dh * (kh - 1) - 1)) / sh + 1); + ow = ceil(((float)(w + 2 * pw - dw * (kw - 1) - 1)) / sw + 1); + } else { + oh = floor(((float)(h + 2 * ph - dh * (kh - 1) - 1)) / sh + 1); + ow = floor(((float)(w + 2 * pw - dw * (kw - 1) - 1)) / sw + 1); + } auto ret = input->getDims(); ret[input->getRank() - 2] = oh; ret[input->getRank() - 1] = ow; @@ -34,17 +38,19 @@ std::string PoolingObj::toString() const { os << "p=[" << ph << "," << pw << "],"; os << "s=[" << sh << "," << sw << "],"; os << "d=[" << dh << "," << dw << "],"; + os << "ceil mode=" << ceilMode << ","; os << "input=" << inputs[0]->getGuid() << ","; os << "output=" << outputs[0]->getGuid() << ")"; return os.str(); } vector PoolingObj::getWorkloadVector() const { - return {type.underlying(), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), n, c, h, w, kh, kw, ph, pw, sh, sw, dh, dw, + ceilMode}; } vector PoolingObj::getOpAttrVector() const { - return {type.underlying(), kh, kw, ph, pw, sh, sw, dh, dw}; + return {type.underlying(), kh, kw, ph, pw, sh, sw, dh, dw, ceilMode}; } }; // namespace infini diff --git a/test/core/test_graph_replace.cc b/test/core/test_graph_replace.cc index a77624ec..cada8860 100644 --- a/test/core/test_graph_replace.cc +++ b/test/core/test_graph_replace.cc @@ -208,16 +208,13 @@ TEST(MatchGraph, multi_output) { SubGraph subg0 = make_ref(runtime, TensorVec{i}); { auto maxpool = - subg0->addOp(i, nullptr, 3, 3, 0, 0, 0, 0, 2, 2); + subg0->addOp(i, nullptr, 3, 3, 1, 1, 0, 0, 2, 2, 0); Tensor w0 = subg0->addTensor(Shape{64, 192, 1, 1}, DataType::UInt32); auto conv0 = subg0->addOp(maxpool->getOutput(0), w0, nullptr); auto relu0 = subg0->addOp(conv0->getOutput(0), nullptr); - auto pad = subg0->addOp(maxpool->getOutput(0), nullptr, - vector{0, 0, 1, 1, 0, 0, 1, 1}, - std::nullopt); - auto avgpool = subg0->addOp(pad->getOutput(0), nullptr, 3, - 3, 0, 0, 0, 0, 1, 1); + auto avgpool = subg0->addOp(maxpool->getOutput(0), nullptr, + 3, 3, 0, 0, 0, 0, 1, 1, 0); subg0->setOutputs( TensorVec{relu0->getOutput(0), avgpool->getOutput(0)}); } @@ -225,8 +222,9 @@ TEST(MatchGraph, multi_output) { SubGraph subg1 = make_ref(runtime, TensorVec{i->clone(runtime)}); { - auto avgpool = subg1->addOp( - subg1->getInputsFromOutside()[0], nullptr, 3, 3, 0, 0, 0, 0, 2, 2); + auto avgpool = + subg1->addOp(subg1->getInputsFromOutside()[0], nullptr, + 3, 3, 1, 1, 0, 0, 2, 2, 0); auto relu0 = subg1->addOp(avgpool->getOutput(0), nullptr); @@ -295,7 +293,7 @@ TEST(MatchGraph, multi_input_output) { Tensor w2 = subg0->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32); auto conv2 = subg0->addOp(relu1->getOutput(0), w2, nullptr); auto maxpool = subg0->addOp(relu1->getOutput(0), nullptr, 3, - 3, 0, 0, 0, 0, 2, 2); + 3, 1, 1, 0, 0, 2, 2, 0); subg0->setOutputs( TensorVec{conv2->getOutput(0), maxpool->getOutput(0)}); } @@ -317,7 +315,7 @@ TEST(MatchGraph, multi_input_output) { Tensor w2 = subg1->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32); auto conv2 = subg1->addOp(relu1->getOutput(0), w2, nullptr); auto maxpool = subg1->addOp(relu1->getOutput(0), nullptr, 3, - 3, 0, 0, 0, 0, 2, 2); + 3, 1, 1, 0, 0, 2, 2, 0); subg1->setOutputs( TensorVec{maxpool->getOutput(0), conv2->getOutput(0)}); } @@ -338,7 +336,7 @@ TEST(MatchGraph, multi_input_output) { Tensor w2 = subg2->addTensor(Shape{128, 256, 1, 1}, DataType::UInt32); auto conv2 = subg2->addOp(relu1->getOutput(0), w2, nullptr); auto avgpool = subg2->addOp(relu1->getOutput(0), nullptr, 3, - 3, 0, 0, 0, 0, 2, 2); + 3, 1, 1, 0, 0, 2, 2, 0); subg2->setOutputs( TensorVec{conv2->getOutput(0), avgpool->getOutput(0)}); } @@ -349,7 +347,7 @@ TEST(MatchGraph, multi_input_output) { auto i = g->addTensor(Shape{1, 64, 112, 112}, DataType::UInt32); auto relu = g->addOp(i, nullptr); auto maxPool = g->addOp(relu->getOutput(0), nullptr, 3, 3, - 0, 0, 1, 1, 2, 2); + 1, 1, 1, 1, 2, 2, 0); auto out0 = v.addSubGraph(subg0, {relu->getOutput(0), maxPool->getOutput(0)}); auto out1 = diff --git a/test/kernels/bang/test_bang_pooling.cc b/test/kernels/bang/test_bang_pooling.cc index 20347ae9..4bbc8091 100644 --- a/test/kernels/bang/test_bang_pooling.cc +++ b/test/kernels/bang/test_bang_pooling.cc @@ -8,7 +8,8 @@ namespace infini { -template +template {}, + int>::type = 0> void testPooling(const std::function &generator, const Shape &shape) { // Runtime @@ -23,7 +24,8 @@ void testPooling(const std::function &generator, // GPU Graph bangGraph = make_ref(bangRuntime); auto inputGpu = bangGraph->cloneTensor(inputCpu); - auto gpuOp = bangGraph->addOp(inputGpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2); + auto gpuOp = + bangGraph->addOp(inputGpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0); bangGraph->dataMalloc(); bangRuntime->run(bangGraph); auto outputGpu = gpuOp->getOutput(); diff --git a/test/kernels/cuda/test_cuda_inception.cc b/test/kernels/cuda/test_cuda_inception.cc index 31a7b888..b6765648 100644 --- a/test/kernels/cuda/test_cuda_inception.cc +++ b/test/kernels/cuda/test_cuda_inception.cc @@ -29,7 +29,7 @@ TEST(CUDA_Inception_v3_block, run) { TensorVec outputs; vector ops; auto maxpool = - g->addOp(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1); + g->addOp(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1, 0); auto chainInput = maxpool->getOutput(); for (auto &pathConfig : configs) { int inputChannels = initialChannels; @@ -52,7 +52,7 @@ TEST(CUDA_Inception_v3_block, run) { inputChannels = f; } else { // Add AveragePool auto pool = g->addOp(input, nullptr, r, r, 1, 1, - r / 2, r / 2, 1, 1); + r / 2, r / 2, 1, 1, 0); input = pool->getOutput(); ops.back().emplace_back(pool); } diff --git a/test/kernels/cuda/test_cuda_pooling.cc b/test/kernels/cuda/test_cuda_pooling.cc index 7347b951..7e9a6deb 100644 --- a/test/kernels/cuda/test_cuda_pooling.cc +++ b/test/kernels/cuda/test_cuda_pooling.cc @@ -9,7 +9,8 @@ namespace infini { using KDPS = vector; using ExpectOutput = vector; -template +template {}, + int>::type = 0> void testPoolCudnn( const std::function &generator, const Shape &shape, const KDPS &kdps, const ExpectOutput &ansVec) { @@ -24,7 +25,7 @@ void testPoolCudnn( Graph g = make_ref(cudaRuntime); auto i0 = g->cloneTensor(i0cpu); auto pool = g->addOp(i0, nullptr, kdps[0], kdps[1], kdps[2], kdps[3], - kdps[4], kdps[5], kdps[6], kdps[7]); + kdps[4], kdps[5], kdps[6], kdps[7], 0); // allocate CUDA memory g->dataMalloc(); diff --git a/test/operators/test_pooling.cc b/test/operators/test_pooling.cc index 8b4c52ed..1ddefa13 100644 --- a/test/operators/test_pooling.cc +++ b/test/operators/test_pooling.cc @@ -12,16 +12,16 @@ TEST(MaxPool, ShapeInference) { Graph g = make_ref(cpuRuntime); Tensor i = g->addTensor({1, 64, 162, 162}, DataType::UInt32); const int kh = 3, kw = 3, dh = 1, dw = 1, ph = 0, pw = 0, sh = 2, - sw = 2; - auto op = - g->addOp(i, nullptr, kh, kw, dh, dw, ph, pw, sh, sw); + sw = 2, ceilMode = 0; + auto op = g->addOp(i, nullptr, kh, kw, dh, dw, ph, pw, sh, + sw, ceilMode); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 64, 80, 80})); } { // dilation & stride Graph g = make_ref(cpuRuntime); Tensor i = g->addTensor({1, 64, 162, 162}, DataType::UInt32); - auto op = g->addOp(i, nullptr, 4, 3, 1, 1, 2, 1, 1, 2); + auto op = g->addOp(i, nullptr, 4, 3, 1, 1, 2, 1, 1, 2, 0); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 64, 163, 81})); } } @@ -30,7 +30,7 @@ TEST(MaxPool, NaiveCPU) { Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); Graph g = make_ref(cpuRuntime); Tensor i = g->addTensor({1, 2, 5, 5}, DataType::UInt32); - auto op = g->addOp(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2); + auto op = g->addOp(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0); g->dataMalloc(); i->setData(IncrementalGenerator()); @@ -49,7 +49,7 @@ TEST(AvgPool, NaiveCPU) { Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance(); Graph g = make_ref(cpuRuntime); Tensor i = g->addTensor({1, 2, 5, 5}, DataType::Float32); - auto op = g->addOp(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2); + auto op = g->addOp(i, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0); g->dataMalloc(); i->setData(IncrementalGenerator());