forked from jiuyuan/InfiniTensor
add conv_transpose&&native maxpooling
This commit is contained in:
parent
fc4b62a88c
commit
a5ccf06551
|
@ -20,6 +20,7 @@ endif()
|
||||||
include(CMakeDependentOption)
|
include(CMakeDependentOption)
|
||||||
project(InfiniTensor C CXX)
|
project(InfiniTensor C CXX)
|
||||||
|
|
||||||
|
|
||||||
cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF)
|
cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF)
|
||||||
cmake_dependent_option(BUILD_TEST_PET "Build tests for PET" OFF BUILD_TEST OFF)
|
cmake_dependent_option(BUILD_TEST_PET "Build tests for PET" OFF BUILD_TEST OFF)
|
||||||
|
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -39,7 +39,7 @@ endif
|
||||||
|
|
||||||
build:
|
build:
|
||||||
mkdir -p build/$(TYPE)
|
mkdir -p build/$(TYPE)
|
||||||
cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8
|
cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf build
|
rm -rf build
|
||||||
|
|
|
@ -20,21 +20,16 @@ class ASCENDRuntimeObj : public RuntimeObj {
|
||||||
private:
|
private:
|
||||||
aclrtContext context;
|
aclrtContext context;
|
||||||
aclrtStream stream;
|
aclrtStream stream;
|
||||||
|
std::unique_ptr<CommunicatorObj> comm;
|
||||||
ASCENDPtr workspace = nullptr;
|
ASCENDPtr workspace = nullptr;
|
||||||
size_t workspaceSize;
|
size_t workspaceSize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ASCENDRuntimeObj(int deviceId = 0) : RuntimeObj(Device::ASCEND, deviceId) {
|
ASCENDRuntimeObj(int deviceId = 0) : RuntimeObj(Device::ASCEND, deviceId) {
|
||||||
// #ifndef _ACL_INIT
|
auto ret = aclInit(nullptr);
|
||||||
// #define _ACL_INIT
|
CHECK_RET(ret == ACL_SUCCESS,
|
||||||
// aclInit(nullptr);
|
LOG_PRINT("aclInit failed. ERROR: %d\n", ret));
|
||||||
// // auto ret_init =
|
ret = aclrtSetDevice(deviceId);
|
||||||
// // CHECK_RET(ret == ACL_SUCCESS,
|
|
||||||
// // LOG_PRINT("aclInit failed. ERROR: %d\n",
|
|
||||||
// ret));
|
|
||||||
// #endif
|
|
||||||
aclInit(nullptr);
|
|
||||||
auto ret = aclrtSetDevice(deviceId);
|
|
||||||
CHECK_RET(ret == ACL_SUCCESS,
|
CHECK_RET(ret == ACL_SUCCESS,
|
||||||
LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret));
|
LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret));
|
||||||
ret = aclrtCreateContext(&context, deviceId);
|
ret = aclrtCreateContext(&context, deviceId);
|
||||||
|
@ -49,7 +44,7 @@ class ASCENDRuntimeObj : public RuntimeObj {
|
||||||
|
|
||||||
// 10GB for Longformer
|
// 10GB for Longformer
|
||||||
// size_t longformerNum = 3lu * (1 << 30);
|
// size_t longformerNum = 3lu * (1 << 30);
|
||||||
workspaceSize = 3ll << 30; // 3 GB
|
workspaceSize = 3ll << 33; // 3 GB
|
||||||
// std::cout<<workspaceSize/1024/1024/1024<< std::endl;
|
// std::cout<<workspaceSize/1024/1024/1024<< std::endl;
|
||||||
// std::cout<<std::bitset<64>(workspaceSize)<< std::endl;
|
// std::cout<<std::bitset<64>(workspaceSize)<< std::endl;
|
||||||
workspace = alloc(workspaceSize);
|
workspace = alloc(workspaceSize);
|
||||||
|
@ -99,9 +94,9 @@ class ASCENDRuntimeObj : public RuntimeObj {
|
||||||
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initComm(const string &, int, int) override { IT_TODO_HALT(); }
|
void initComm(const string &name, int worldSize, int rank) final;
|
||||||
|
|
||||||
CommunicatorObj &getCommunicator() const override { IT_TODO_HALT(); }
|
CommunicatorObj &getCommunicator() const override { return *comm; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void runWithoutSync(const Graph &graph, bool tune, bool profiling) const;
|
void runWithoutSync(const Graph &graph, bool tune, bool profiling) const;
|
||||||
|
|
|
@ -184,7 +184,7 @@ class OnnxStub:
|
||||||
node,
|
node,
|
||||||
{
|
{
|
||||||
"dilations": [1, 1],
|
"dilations": [1, 1],
|
||||||
"pads": [0, 0],
|
"pads": [0, 0, 0, 0],
|
||||||
"strides": [1, 1],
|
"strides": [1, 1],
|
||||||
"output_padding": [0, 0],
|
"output_padding": [0, 0],
|
||||||
},
|
},
|
||||||
|
@ -193,8 +193,52 @@ class OnnxStub:
|
||||||
attributes[name]
|
attributes[name]
|
||||||
for name in ["dilations", "pads", "strides", "output_padding"]
|
for name in ["dilations", "pads", "strides", "output_padding"]
|
||||||
)
|
)
|
||||||
|
if p[0] != p[2] or p[1] != p[3]:
|
||||||
|
adapt = "{}-adapt".format(node.output[0])
|
||||||
|
tensors[adapt] = self.handler.pad(
|
||||||
|
tensors[node.input[0]], None, p, [-2, -1]
|
||||||
|
)
|
||||||
|
p = [0, 0, 0, 0]
|
||||||
|
else:
|
||||||
|
adapt = node.input[0]
|
||||||
|
|
||||||
|
if len(node.input) > 2:
|
||||||
|
bias = "{}-bias".format(node.output[0])
|
||||||
|
reshape = "{}-reshape".format(node.output[0])
|
||||||
|
tensors[bias] = self.handler.convTransposed2d(
|
||||||
|
tensors[adapt],
|
||||||
|
tensors[node.input[1]],
|
||||||
|
None,
|
||||||
|
p[0],
|
||||||
|
p[1],
|
||||||
|
s[0],
|
||||||
|
s[1],
|
||||||
|
d[0],
|
||||||
|
d[1],
|
||||||
|
op[0],
|
||||||
|
op[1],
|
||||||
|
)
|
||||||
|
tensors[reshape] = self.handler.reshape(
|
||||||
|
tensors[node.input[2]],
|
||||||
|
None,
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
reduce(
|
||||||
|
lambda acc, x: acc * x,
|
||||||
|
tensors[node.input[2]].shape(),
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
tensors[node.output[0]] = self.handler.add(
|
||||||
|
tensors[bias],
|
||||||
|
tensors[reshape],
|
||||||
|
tensors.get(node.output[0]),
|
||||||
|
)
|
||||||
|
else:
|
||||||
tensors[node.output[0]] = self.handler.convTransposed2d(
|
tensors[node.output[0]] = self.handler.convTransposed2d(
|
||||||
tensors[node.input[0]],
|
tensors[adapt],
|
||||||
tensors[node.input[1]],
|
tensors[node.input[1]],
|
||||||
tensors.get(node.output[0]),
|
tensors.get(node.output[0]),
|
||||||
p[0],
|
p[0],
|
||||||
|
|
|
@ -56,4 +56,15 @@ void ASCENDRuntimeObj::sync() const { ; }
|
||||||
|
|
||||||
string ASCENDRuntimeObj::toString() const { return "ASCEND Runtime"; }
|
string ASCENDRuntimeObj::toString() const { return "ASCEND Runtime"; }
|
||||||
|
|
||||||
|
void ASCENDRuntimeObj::initComm(const string &name, int worldSize, int rank) {
|
||||||
|
IT_ASSERT(worldSize > 0);
|
||||||
|
IT_ASSERT(rank >= 0);
|
||||||
|
IT_ASSERT(rank < worldSize);
|
||||||
|
IT_ASSERT(!comm) << "communicator is already initialized.";
|
||||||
|
#ifdef INFINI_USE_HCCL
|
||||||
|
comm = std::make_unique<HcclCommunicatorObj>(name, worldSize, rank);
|
||||||
|
#else
|
||||||
|
IT_TODO_HALT_MSG("Not compiled with CNCL.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -13,9 +13,9 @@ class ConvAclnn : public ASCENDKernelWithoutConfig {
|
||||||
auto context = dynamic_cast<const ASCENDRuntimeObj *>(_context);
|
auto context = dynamic_cast<const ASCENDRuntimeObj *>(_context);
|
||||||
|
|
||||||
const auto [ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
|
const auto [ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
|
||||||
// const auto [n, c, h, w, f, r, s] = op->getNCHWFRS();
|
const auto [n, c, h, w, f, r, s] = op->getNCHWFRS();
|
||||||
// const int cpg = op->getChannelPerGroup();
|
const int cpg = op->getChannelPerGroup();
|
||||||
// const int g = c / cpg;
|
const int g = c / cpg;
|
||||||
|
|
||||||
std::vector<int64_t> pads = {ph, pw};
|
std::vector<int64_t> pads = {ph, pw};
|
||||||
// std::vector<int64_t> ksize = {r, s};
|
// std::vector<int64_t> ksize = {r, s};
|
||||||
|
@ -67,8 +67,8 @@ class ConvAclnn : public ASCENDKernelWithoutConfig {
|
||||||
|
|
||||||
auto ret = aclnnConvolutionGetWorkspaceSize(
|
auto ret = aclnnConvolutionGetWorkspaceSize(
|
||||||
inputTensor, weightTensor, nullptr, convstride, convpads,
|
inputTensor, weightTensor, nullptr, convstride, convpads,
|
||||||
convdilation, false, convOutputpadding, 1, outputTensor, 1,
|
convdilation, false, convOutputpadding, int64_t(g), outputTensor,
|
||||||
&workspaceSize, &executor);
|
int8_t(1), &workspaceSize, &executor);
|
||||||
void *workspaceAddr = nullptr;
|
void *workspaceAddr = nullptr;
|
||||||
if (workspaceSize > 0) {
|
if (workspaceSize > 0) {
|
||||||
workspaceAddr = context->getWorkspace(workspaceSize);
|
workspaceAddr = context->getWorkspace(workspaceSize);
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#include "operators/pooling.h"
|
#include "operators/pooling.h"
|
||||||
#include "aclnnop/level2/aclnn_adaptive_max_pool2d.h"
|
|
||||||
#include "aclnnop/level2/aclnn_avgpool2d.h"
|
#include "aclnnop/level2/aclnn_avgpool2d.h"
|
||||||
|
#include "aclnnop/level2/aclnn_max_pool.h"
|
||||||
#include "ascend/ascend_kernel_without_config.h"
|
#include "ascend/ascend_kernel_without_config.h"
|
||||||
#include "ascend/ascend_runtime.h"
|
#include "ascend/ascend_runtime.h"
|
||||||
|
|
||||||
|
@ -75,40 +75,6 @@ class AvgPooling : public ASCENDKernelWithoutConfig {
|
||||||
};
|
};
|
||||||
|
|
||||||
class MaxPooling : public ASCENDKernelWithoutConfig {
|
class MaxPooling : public ASCENDKernelWithoutConfig {
|
||||||
// Only adaptiveMaxPool2d was found in the ACLNN doc.
|
|
||||||
int64_t GetShapeSize(const std::vector<int64_t> &shape) {
|
|
||||||
int64_t shapeSize = 1;
|
|
||||||
for (auto i : shape) {
|
|
||||||
shapeSize *= i;
|
|
||||||
}
|
|
||||||
return shapeSize;
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
int CreateAclTensor(const std::vector<T> &hostData,
|
|
||||||
const std::vector<int64_t> &shape, void **deviceAddr,
|
|
||||||
aclDataType dataType, aclTensor **tensor) {
|
|
||||||
auto size = GetShapeSize(shape) * sizeof(T);
|
|
||||||
// 调用aclrtMalloc申请device侧内存
|
|
||||||
auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST);
|
|
||||||
assert(ret == ACL_SUCCESS);
|
|
||||||
// 调用aclrtMemcpy将host侧数据拷贝到device侧内存上
|
|
||||||
ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size,
|
|
||||||
ACL_MEMCPY_HOST_TO_DEVICE);
|
|
||||||
assert(ret == ACL_SUCCESS);
|
|
||||||
|
|
||||||
// 计算连续tensor的strides
|
|
||||||
std::vector<int64_t> strides(shape.size(), 1);
|
|
||||||
for (int64_t i = shape.size() - 2; i >= 0; i--) {
|
|
||||||
strides[i] = shape[i + 1] * strides[i + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 调用aclCreateTensor接口创建aclTensor
|
|
||||||
*tensor = aclCreateTensor(shape.data(), shape.size(), dataType,
|
|
||||||
strides.data(), 0, aclFormat::ACL_FORMAT_NCHW,
|
|
||||||
shape.data(), shape.size(), *deviceAddr);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void compute(const Operator &_op,
|
void compute(const Operator &_op,
|
||||||
const RuntimeObj *_context) const override {
|
const RuntimeObj *_context) const override {
|
||||||
auto op = as<PoolingObj>(_op);
|
auto op = as<PoolingObj>(_op);
|
||||||
|
@ -117,6 +83,15 @@ class MaxPooling : public ASCENDKernelWithoutConfig {
|
||||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
|
auto [n, c, h, w, kh, kw] = op->getNCHWRS();
|
||||||
|
auto [ph, pw, sh, sw, dh, dw] = op->getPadStrideDilation();
|
||||||
|
int64_t ceilMode = int64_t(op->getCeilMode());
|
||||||
|
|
||||||
|
std::vector<int64_t> ksize = {kh, kw};
|
||||||
|
std::vector<int64_t> stride = {sh, sw};
|
||||||
|
std::vector<int64_t> pad = {ph, pw};
|
||||||
|
std::vector<int64_t> dilation = {dh, dw};
|
||||||
|
|
||||||
auto selfD = op->getInputs(0)->getDims();
|
auto selfD = op->getInputs(0)->getDims();
|
||||||
auto selfS = op->getInputs(0)->getStride();
|
auto selfS = op->getInputs(0)->getStride();
|
||||||
auto outD = op->getOutput()->getDims();
|
auto outD = op->getOutput()->getDims();
|
||||||
|
@ -127,20 +102,12 @@ class MaxPooling : public ASCENDKernelWithoutConfig {
|
||||||
std::vector<int64_t> outputDim = castTo64(outD);
|
std::vector<int64_t> outputDim = castTo64(outD);
|
||||||
std::vector<int64_t> outputStride = castTo64(outS);
|
std::vector<int64_t> outputStride = castTo64(outS);
|
||||||
|
|
||||||
std::vector<int64_t> outputHW(2, 1);
|
aclIntArray *kernelSize = aclCreateIntArray(ksize.data(), ksize.size());
|
||||||
outputHW[0] = outputDim[outputDim.size() - 2];
|
aclIntArray *strides = aclCreateIntArray(stride.data(), stride.size());
|
||||||
outputHW[1] = outputDim[outputDim.size() - 1];
|
aclIntArray *paddings = aclCreateIntArray(pad.data(), pad.size());
|
||||||
|
aclIntArray *dilations =
|
||||||
|
aclCreateIntArray(dilation.data(), dilation.size());
|
||||||
|
|
||||||
int64_t indicesOutSize = 1;
|
|
||||||
for (auto i : outputDim) {
|
|
||||||
indicesOutSize *= i;
|
|
||||||
}
|
|
||||||
void *indicesOutDeviceAddr = nullptr;
|
|
||||||
aclrtMalloc(&indicesOutDeviceAddr, indicesOutSize,
|
|
||||||
ACL_MEM_MALLOC_HUGE_FIRST);
|
|
||||||
|
|
||||||
aclIntArray *outputsize =
|
|
||||||
aclCreateIntArray(outputHW.data(), outputHW.size());
|
|
||||||
auto selfTensor = aclCreateTensor(
|
auto selfTensor = aclCreateTensor(
|
||||||
selfDim.data(), selfDim.size(), ACL_FLOAT, selfStride.data(), 0,
|
selfDim.data(), selfDim.size(), ACL_FLOAT, selfStride.data(), 0,
|
||||||
aclFormat::ACL_FORMAT_NCHW, selfDim.data(), selfDim.size(), aData);
|
aclFormat::ACL_FORMAT_NCHW, selfDim.data(), selfDim.size(), aData);
|
||||||
|
@ -148,16 +115,12 @@ class MaxPooling : public ASCENDKernelWithoutConfig {
|
||||||
aclCreateTensor(outputDim.data(), outputDim.size(), ACL_FLOAT,
|
aclCreateTensor(outputDim.data(), outputDim.size(), ACL_FLOAT,
|
||||||
outputStride.data(), 0, aclFormat::ACL_FORMAT_NCHW,
|
outputStride.data(), 0, aclFormat::ACL_FORMAT_NCHW,
|
||||||
outputDim.data(), outputDim.size(), cData);
|
outputDim.data(), outputDim.size(), cData);
|
||||||
auto indicesOutTensor = aclCreateTensor(
|
|
||||||
outputDim.data(), outputDim.size(), ACL_INT64, outputStride.data(),
|
|
||||||
0, aclFormat::ACL_FORMAT_NCHW, outputDim.data(), outputDim.size(),
|
|
||||||
indicesOutDeviceAddr);
|
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor *executor;
|
aclOpExecutor *executor;
|
||||||
auto ret = aclnnAdaptiveMaxPool2dGetWorkspaceSize(
|
auto ret = aclnnMaxPoolGetWorkspaceSize(
|
||||||
selfTensor, outputsize, outputTensor, indicesOutTensor,
|
selfTensor, kernelSize, strides, 0, paddings, dilations, ceilMode,
|
||||||
&workspaceSize, &executor);
|
outputTensor, &workspaceSize, &executor);
|
||||||
assert(ret == ACL_SUCCESS);
|
assert(ret == ACL_SUCCESS);
|
||||||
|
|
||||||
void *workspaceAddr = nullptr;
|
void *workspaceAddr = nullptr;
|
||||||
|
@ -165,15 +128,13 @@ class MaxPooling : public ASCENDKernelWithoutConfig {
|
||||||
workspaceAddr = context->getWorkspace(workspaceSize);
|
workspaceAddr = context->getWorkspace(workspaceSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = aclnnAdaptiveMaxPool2d(workspaceAddr, workspaceSize, executor,
|
ret = aclnnMaxPool(workspaceAddr, workspaceSize, executor,
|
||||||
context->ASCENDHandle());
|
context->ASCENDHandle());
|
||||||
assert(ret == ACL_SUCCESS);
|
assert(ret == ACL_SUCCESS);
|
||||||
|
|
||||||
ret = aclrtSynchronizeStream(context->ASCENDHandle());
|
ret = aclrtSynchronizeStream(context->ASCENDHandle());
|
||||||
assert(ret == ACL_SUCCESS);
|
assert(ret == ACL_SUCCESS);
|
||||||
|
|
||||||
aclDestroyTensor(indicesOutTensor);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -45,14 +45,16 @@ void testConv(const std::function<void(void *, size_t, DataType)> &generatorA,
|
||||||
cpuRuntime->run(cpuGraph);
|
cpuRuntime->run(cpuGraph);
|
||||||
auto outputCpu = cpuOp->getOutput();
|
auto outputCpu = cpuOp->getOutput();
|
||||||
// Check
|
// Check
|
||||||
EXPECT_TRUE(outputCpu->equalData(outputNpu2Cpu));
|
// outputCpu->printData();
|
||||||
|
// outputNpu2Cpu->printData();
|
||||||
|
EXPECT_TRUE(outputCpu->equalData(outputNpu2Cpu, 1e-3));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ascend_Conv, run) {
|
TEST(ascend_Conv, run) {
|
||||||
aclInit(nullptr);
|
// aclInit(nullptr);
|
||||||
testConv<ConvObj>(IncrementalGenerator(), IncrementalGenerator(),
|
testConv<ConvObj>(IncrementalGenerator(), IncrementalGenerator(),
|
||||||
Shape{1, 3, 32, 32}, Shape{2, 3, 3, 3});
|
Shape{1, 3, 128, 128}, Shape{2, 3, 3, 3});
|
||||||
aclFinalize();
|
// aclFinalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -53,9 +53,10 @@ TEST(ascend_ElementWise, run) {
|
||||||
// aclInit(nullptr);
|
// aclInit(nullptr);
|
||||||
// testElementWise<PowObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
// testElementWise<PowObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||||
// testElementWise<AddObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
// testElementWise<AddObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||||
// testElementWise<SubObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
testElementWise<SubObj>(IncrementalGenerator(), Shape{1, 1, 48, 48},
|
||||||
testElementWise<DivObj>(IncrementalGenerator(), Shape{1},
|
Shape{1, 1, 1, 1});
|
||||||
Shape{1, 2, 2, 3});
|
// testElementWise<DivObj>(IncrementalGenerator(), Shape{1}, Shape{1, 2, 2,
|
||||||
|
// 3});
|
||||||
// testElementWise<MulObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
// testElementWise<MulObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||||
// aclFinalize();
|
// aclFinalize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,10 +50,10 @@ void testMatmul(const std::function<void(void *, size_t, DataType)> &generatorA,
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ascend_Matmul, run) {
|
TEST(ascend_Matmul, run) {
|
||||||
aclInit(nullptr);
|
// aclInit(nullptr);
|
||||||
testMatmul<MatmulObj>(IncrementalGenerator(), IncrementalGenerator(), false,
|
testMatmul<MatmulObj>(IncrementalGenerator(), IncrementalGenerator(), false,
|
||||||
false, Shape{1, 2, 3}, Shape{1, 3, 4});
|
false, Shape{1, 2, 3}, Shape{1, 3, 4});
|
||||||
aclFinalize();
|
// aclFinalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -26,6 +26,7 @@ void testPooling(const std::function<void(void *, size_t, DataType)> &generator,
|
||||||
auto inputNpu = npuGraph->cloneTensor(inputCpu);
|
auto inputNpu = npuGraph->cloneTensor(inputCpu);
|
||||||
auto npuOp =
|
auto npuOp =
|
||||||
npuGraph->addOp<T>(inputNpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0);
|
npuGraph->addOp<T>(inputNpu, nullptr, 3, 3, 1, 1, 1, 1, 2, 2, 0);
|
||||||
|
// npuGraph->addOp<T>(inputNpu, nullptr, 2, 2, 1, 1, 0, 0, 1, 1, 0);
|
||||||
npuGraph->dataMalloc();
|
npuGraph->dataMalloc();
|
||||||
inputNpu->setData(generator);
|
inputNpu->setData(generator);
|
||||||
npuRuntime->run(npuGraph);
|
npuRuntime->run(npuGraph);
|
||||||
|
@ -38,10 +39,10 @@ void testPooling(const std::function<void(void *, size_t, DataType)> &generator,
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(cnnl_Pooling, run) {
|
TEST(cnnl_Pooling, run) {
|
||||||
aclInit(nullptr);
|
// aclInit(nullptr);
|
||||||
testPooling<MaxPoolObj>(IncrementalGenerator(), Shape{1, 2, 5, 5});
|
// testPooling<MaxPoolObj>(IncrementalGenerator(), Shape{1, 2, 5, 5});
|
||||||
testPooling<AvgPoolObj>(IncrementalGenerator(), Shape{1, 2, 5, 5});
|
testPooling<AvgPoolObj>(IncrementalGenerator(), Shape{1, 2, 5, 5});
|
||||||
aclFinalize();
|
// aclFinalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
Loading…
Reference in New Issue