InfiniTensor/test/kernels/intelcpu/test_mkl_conv.cc

64 lines
1.9 KiB
C++
Raw Permalink Normal View History

#include "core/graph.h"
#include "core/kernel.h"
#include "core/perf_engine.h"
#include "core/runtime.h"
#include "intelcpu/mkl_runtime.h"
#include "operators/conv.h"
#include "test.h"
namespace infini {
void testConvDnnl(
const std::function<void(void *, size_t, DataType)> &generator,
vector<float> ansVec) {
auto mklRuntime = MklRuntimeObj::getInstance();
Graph gMkl = make_ref<GraphObj>(mklRuntime);
Tensor i0 = gMkl->addTensor({1, 3, 4, 4}, DataType::Float32);
Tensor w0 = gMkl->addTensor({2, 3, 3, 3}, DataType::Float32);
// Build graph
auto conv = gMkl->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
// Malloc data for all tensors in a graph.
gMkl->dataMalloc();
i0->setData(generator);
w0->setData(generator);
mklRuntime->run(gMkl);
EXPECT_TRUE(conv->getOutput(0)->equalData(ansVec));
}
TEST(dnnl_Conv, run) {
testConvDnnl(OneGenerator(), vector<float>{12, 12, 18, 18, 12, 12, 18, 18});
testConvDnnl(
IncrementalGenerator(),
vector<float>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
}
TEST(mkl_Conv, tune) {
auto mklRuntime = MklRuntimeObj::getInstance();
Graph gMkl = make_ref<GraphObj>(mklRuntime);
Tensor i0 = gMkl->addTensor({1, 3, 224, 224}, DataType::Float32);
Tensor w0 = gMkl->addTensor({2, 3, 3, 3}, DataType::Float32);
auto conv = gMkl->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 1, 1, 1, 1);
gMkl->dataMalloc();
i0->setData(IncrementalGenerator());
w0->setData(IncrementalGenerator());
// Execute on CUDA
bool tune = true;
mklRuntime->run(gMkl, tune);
// check record
Modify kernel registration & support fp16 (#205) * - Remove dataType from the kernel registration. * - support fp16 for conv * - cpu kernel: adapt the new registration mechanism * modified all register kernel * add where fp16 * add layernorm fp16 * add split_concat fp16 * - element_wise support fp16 * feat: support transpose fp16 * feat: support sliceOp fp16 * - unary support fp16 * - feat: support reduceOp fp16 * feat: support matmulOp/expandOp fp16 * feat: support powOp int8 * add cuda cast & support half-precision for gather * style: fix style * feat:support int8 for gather * style:fix style * modified test_cuda_conv_transposed * fix: fix dist code to support fp16 * fix(graph.cc): fix topo_sort * fix: fix recv and send kernel registration * feat: add field tensors for stub * refactor(frontend): 先排序后构图 Signed-off-by: YdrMaster <ydrml@hotmail.com> * fix: 为中间结果提供tensor到node的mapping * fix (slice): add guard for area out of range * fix: fix matmul fp16 * fix: fix re-dataMalloc for weight tensor and use of naive allocator * feat: add dataType filter for cuda kernel * feat: bang kernel adapt the new registration mechanism * fix: fix some error on mlu * feat: intelcpu kernel adapt the new registration mechanism * feat: modify kernel registration on kunlun * fix intelcpu compiler bug * feat: bang reshape support all dataType * fix: fix bang reduce * fix(all_reduce.cc): fix as reviewer suggessted * fix: fix style and restore unary test codes --------- Signed-off-by: YdrMaster <ydrml@hotmail.com> Co-authored-by: xgqdut2016 <kenan_gewei@163.com> Co-authored-by: xgqdut2016 <140036308+xgqdut2016@users.noreply.github.com> Co-authored-by: zhangyunze <z13785159769@163.com> Co-authored-by: OdinaryWord <sx-hz@163.com> Co-authored-by: YdrMaster <ydrml@hotmail.com> Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
2024-01-15 11:02:13 +08:00
auto kernelAttrs =
KernelAttrs{Device::INTELCPU, conv->getOpType().underlying()};
auto perfKey = PerfEngine::Key{kernelAttrs, conv->getOpPerfKey()};
std::optional<PerfRecord> perfData =
PerfEngine::getInstance().getPerfData(perfKey);
ASSERT_TRUE(perfData.has_value());
}
} // namespace infini