InfiniTensor/test/kernels/cuda/test_perfengine.cc

#include "core/graph.h"
#include "core/kernel.h"
#include "core/perf_engine.h"
#include "core/runtime.h"
#include "cuda/cuda_runtime.h"
#include "cuda/cuda_utility.h"
#include "operators/conv.h"
#include "operators/matmul.h"
#include "test.h"

namespace infini {

TEST(PerfEngine, save_and_load) {
    Runtime cpu = NativeCpuRuntimeObj::getInstance(); // CPUruntime is singleton
    Graph gCpu = make_ref<GraphObj>(cpu);
    Runtime cuda = make_ref<CudaRuntimeObj>();
    { // Conv
        Graph gCuda = make_ref<GraphObj>(cuda);

        // Copy input tensors from CPU to CUDA
        Tensor i0Cuda = gCuda->addTensor({1, 3, 224, 224}, DataType::Float32);
        Tensor w0Cuda = gCuda->addTensor({2, 3, 3, 3}, DataType::Float32);
        // Build CUDA graph
        auto conv =
            gCuda->addOp<ConvObj>(i0Cuda, w0Cuda, nullptr, 1, 1, 1, 1, 1, 1);
        gCuda->dataMalloc();
        cuda->run(gCuda, true);
    }

    { // Matmul
        Graph gCuda = make_ref<GraphObj>(cuda);
        auto ACuda = gCuda->addTensor(Shape{1, 3, 5}, DataType::Float32);
        auto BCuda = gCuda->addTensor(Shape{1, 5, 2}, DataType::Float32);
        auto matmul = gCuda->addOp<MatmulObj>(ACuda, BCuda, nullptr);
        gCuda->dataMalloc();
        cuda->run(gCuda, true);
    }
    auto &perfEngine = PerfEngine::getInstance();

    json j0 = perfEngine;
    std::cout << "PerfEngine saveed:" << std::endl;
    std::cout << j0 << std::endl;
    perfEngine.savePerfEngineData("test.json");
    perfEngine.loadPerfEngineData("test.json");
    json j1 = perfEngine;
    std::cout << "PerfEngine loaded:" << std::endl;
    std::cout << j1 << std::endl;
    EXPECT_TRUE(j0 == j1);
}
} // namespace infini
Json perfrecord (#32) Added perfengine serialization&deserialization and corresponding test case. * Add: perfrecord json representation. * Add: perfrecord virtual func. to_json&from_json. * Add: perfengine serilization and deserilization. * Modify: tune func type to supp derived struct serilization. * Fix: structure after rebase * Chore: Remove empty line in conv.h Co-authored-by: wcz112 <wcz19@mails.tsinghua.edu.cn> Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> Co-authored-by: zhengly123 <zhengly123@outlook.com> 2022-09-22 15:34:34 +08:00			`#include "core/graph.h"`
			`#include "core/kernel.h"`
			`#include "core/perf_engine.h"`
			`#include "core/runtime.h"`
			`#include "cuda/cuda_runtime.h"`
			`#include "cuda/cuda_utility.h"`
			`#include "operators/conv.h"`
			`#include "operators/matmul.h"`
			`#include "test.h"`

			`namespace infini {`

			`TEST(PerfEngine, save_and_load) {`
ADD: add mkl runtime for intel cpu , and add mkl kernel for matmul/conv/convtransposed. (#61) * move memory format transformation to TensorObj clang format add MemoryFormat for tensorObj. use post_ops for fused conv/deconv Distinguish mkl op_timer from cuda op timer. add act optype to conv and deconv add operator timer add mkl kernel for convTransposed minor fix for group conv do not use cblas_sgemm_batch CpuRuntimeObj->NativeCpuRuntimeObj add matmul op for mkl * fix: fix bugs when rebasing from master fix: fix bugs when rebasing from master * fix: update api after rebasing * fix: fix format; fix onnx import * fix: fix clang-format * [fix] fix conv_transpose test * [fix] use stronger test case for transposed conv * [fix] remove tensor memory format; fix mkl transpose conv * [fix] add FIXME tag for op_timer python api --------- Co-authored-by: whjthu <haojie0429@gmail.com> 2023-03-27 21:28:49 +08:00			`Runtime cpu = NativeCpuRuntimeObj::getInstance(); // CPUruntime is singleton`
Json perfrecord (#32) Added perfengine serialization&deserialization and corresponding test case. * Add: perfrecord json representation. * Add: perfrecord virtual func. to_json&from_json. * Add: perfengine serilization and deserilization. * Modify: tune func type to supp derived struct serilization. * Fix: structure after rebase * Chore: Remove empty line in conv.h Co-authored-by: wcz112 <wcz19@mails.tsinghua.edu.cn> Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> Co-authored-by: zhengly123 <zhengly123@outlook.com> 2022-09-22 15:34:34 +08:00			`Graph gCpu = make_ref<GraphObj>(cpu);`
			`Runtime cuda = make_ref<CudaRuntimeObj>();`
Add python interface for CUDA operator evaluation (#42) * Refactor: seperate data generator * Add: python bindings for opTimer * Fix: test_perfengine Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-09-27 10:41:12 +08:00			`{ // Conv`
			`Graph gCuda = make_ref<GraphObj>(cuda);`

			`// Copy input tensors from CPU to CUDA`
			`Tensor i0Cuda = gCuda->addTensor({1, 3, 224, 224}, DataType::Float32);`
			`Tensor w0Cuda = gCuda->addTensor({2, 3, 3, 3}, DataType::Float32);`
			`// Build CUDA graph`
			`auto conv =`
			`gCuda->addOp<ConvObj>(i0Cuda, w0Cuda, nullptr, 1, 1, 1, 1, 1, 1);`
			`gCuda->dataMalloc();`
			`cuda->run(gCuda, true);`
			`}`

			`{ // Matmul`
			`Graph gCuda = make_ref<GraphObj>(cuda);`
			`auto ACuda = gCuda->addTensor(Shape{1, 3, 5}, DataType::Float32);`
			`auto BCuda = gCuda->addTensor(Shape{1, 5, 2}, DataType::Float32);`
			`auto matmul = gCuda->addOp<MatmulObj>(ACuda, BCuda, nullptr);`
			`gCuda->dataMalloc();`
			`cuda->run(gCuda, true);`
			`}`
Json perfrecord (#32) Added perfengine serialization&deserialization and corresponding test case. * Add: perfrecord json representation. * Add: perfrecord virtual func. to_json&from_json. * Add: perfengine serilization and deserilization. * Modify: tune func type to supp derived struct serilization. * Fix: structure after rebase * Chore: Remove empty line in conv.h Co-authored-by: wcz112 <wcz19@mails.tsinghua.edu.cn> Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> Co-authored-by: zhengly123 <zhengly123@outlook.com> 2022-09-22 15:34:34 +08:00			`auto &perfEngine = PerfEngine::getInstance();`

			`json j0 = perfEngine;`
			`std::cout << "PerfEngine saveed:" << std::endl;`
			`std::cout << j0 << std::endl;`
			`perfEngine.savePerfEngineData("test.json");`
			`perfEngine.loadPerfEngineData("test.json");`
			`json j1 = perfEngine;`
			`std::cout << "PerfEngine loaded:" << std::endl;`
			`std::cout << j1 << std::endl;`
			`EXPECT_TRUE(j0 == j1);`
			`}`
ADD: batch norm operator and cuda kernel. (#44) fix numInputs of batchNorm, add new line in file ending. ADD: batch norm operator and cuda kernel. add training remove comments. fix compile error. add batch norm operator and cuda kernel. 2022-10-15 16:29:28 +08:00			`} // namespace infini`