diff --git a/CMakeLists.txt b/CMakeLists.txt index 52f4ff4e..1e167948 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ include(CMakeDependentOption) project(InfiniTensor C CXX) # Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. -option(USE_CUDA "Support CUDA GPU" OFF) +option(USE_CUDA "Support CUDA GPU" ON) option(USE_BANG "Support BANG MLU" OFF) option(USE_BACKTRACE "Print backtrace on exception and segmentation fault" ON) option(USE_PROTOBUF "Serialize and deserialize tensors" ON) diff --git a/test/kernels/cuda/test_resnet.cc b/test/kernels/cuda/test_resnet.cc new file mode 100644 index 00000000..f12b27a9 --- /dev/null +++ b/test/kernels/cuda/test_resnet.cc @@ -0,0 +1,74 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "cuda/cuda_runtime.h" +#include "cuda/cuda_utility.h" +#include "operators/element_wise.h" + +#include "test.h" + +namespace infini { + +using ExpectOutput = vector; +template +void testResnet( + const std::function &generator, + const Shape &shape) { + Runtime cpuRuntime = CpuRuntimeObj::getInstance(); + auto cudaRuntime = make_ref(); + + // Build input data on CPU + Tensor acpu = make_ref(shape, DataType::Float32, cpuRuntime); + acpu->dataMalloc(); + acpu->setData(generator); + + Tensor bcpu = make_ref(shape, DataType::Float32, cpuRuntime); + bcpu->dataMalloc(); + bcpu->setData(generator); + + Tensor ccpu = make_ref(shape, DataType::Float32, cpuRuntime); + ccpu->dataMalloc(); + ccpu->setData(generator); + + Graph g = make_ref(cudaRuntime); + Graph cg = make_ref(cpuRuntime); + + auto a = g->cloneTensor(acpu); + auto b = g->cloneTensor(bcpu); + auto c = g->cloneTensor(ccpu); + + auto op = g->addOpWithOutputs(a, b, c); + op = g->addOpWithOutputs(c, b, c); + auto cop = cg->addOpWithOutputs(acpu, bcpu, ccpu); + cop = cg->addOpWithOutputs(ccpu, bcpu, ccpu); + + // allocate CUDA memory + g->dataMalloc(); + cg->dataMalloc(); + + // Execute on CUDA + cudaRuntime->run(g); + cpuRuntime->run(cg); + + // clone CUDA output to CPU + auto gpu2cpu = c->clone(cpuRuntime); + // cudaPrintTensor(c); + // check results on CPU + ccpu->printData(); + EXPECT_TRUE(gpu2cpu->equalData(ccpu)); +} + +TEST(cuDNN_ElementWise, run) { + testResnet( + IncrementalGenerator(), Shape{1, 2, 2, 3}); + testResnet( + IncrementalGenerator(), Shape{1, 2, 2, 3}); + testResnet( + IncrementalGenerator(), Shape{1, 2, 2, 3}); + // testResnet( + // IncrementalGenerator(), Shape{1, 2, 2, 3}); + // testResnet( + // IncrementalGenerator(), Shape{1, 2, 2, 3}); +} + +} // namespace infini