#include "core/graph.h" #include "core/runtime.h" #include "cuda/cuda_runtime.h" #include "nnet/Visitor/MatchReshapeVisitor.h" #include "nnet/expr.h" #include "nnet/nmutator.h" #include "nnet/routine.h" #include "nnet/test.h" #include "operators/matmul.h" #include using namespace infini; using namespace std; TEST(nnet, MemboundOpInterpretation) { Runtime runtime = NativeCpuRuntimeObj::getInstance(); Graph g = make_ref(runtime); Tensor i0 = g->addTensor({1, 2, 3}, DataType::UInt32); Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32); Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32); g->dataMalloc(); i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); g->addOpWithOutputs(i0, w0, o0); NMutator nmutator(NMutator::Mode::ToNaiveMembound); auto mutations = nmutator.run(g); ASSERT_EQ(mutations.size(), 2u); Graph gNew = mutations[1]; gNew->print(); gNew->dataMalloc(); runtime->run(gNew); // check answer auto ops = gNew->getOperators(); EXPECT_EQ(ops.size(), 1u); auto membound = ops[0]; EXPECT_EQ(membound->getOpType(), OpType::MemBound); auto ans = make_ref(Shape{1, 2, 4}, DataType::UInt32, runtime); ans->dataMalloc(); ans->copyData(vector{38, 44, 50, 56, 83, 98, 113, 128}); EXPECT_TRUE(membound->getOutput()->equalData(ans)); } TEST(nnet, MemboundOp_Ansor_Codegen) { auto runtime = make_ref(); Runtime cpu = NativeCpuRuntimeObj::getInstance(); Graph gCpu = make_ref(cpu); Graph g = make_ref(runtime); Tensor i0 = g->addTensor({1, 2, 3}, DataType::Float32); Tensor w0 = g->addTensor({1, 3, 4}, DataType::Float32); Tensor o0 = g->addTensor({1, 2, 4}, DataType::Float32); g->dataMalloc(); i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); g->addOpWithOutputs(i0, w0, o0); NMutator nmutator(NMutator::Mode::ToNaiveMembound); auto mutations = nmutator.run(g); ASSERT_EQ(mutations.size(), 2u); Graph gNew = mutations[1]; gNew->print(); gNew->dataMalloc(); runtime->run(gNew, true); // tune kernels // check answer auto ops = gNew->getOperators(); EXPECT_EQ(ops.size(), 1u); auto membound = ops[0]; EXPECT_EQ(membound->getOpType(), OpType::MemBound); auto ans = make_ref(Shape{1, 2, 4}, DataType::Float32, cpu); ans->dataMalloc(); ans->copyData(vector{38, 44, 50, 56, 83, 98, 113, 128}); auto oCpu = gCpu->cloneTensor(membound->getOutput()); oCpu->printData(); EXPECT_TRUE(oCpu->equalData(ans)); // Timing // double time = timeit([&]() { runtime->run(gNew, false); }); // tune // kernels std::cout << "Time (ms):" << time << std::endl; }