update test

fix
format
2023-10-12 14:07:23 +08:00 · 2023-10-12 09:59:08 +08:00 · 2023-10-11 15:01:01 +08:00 · 2023-10-11 14:56:01 +08:00 · 2023-10-11 14:53:53 +08:00
2 changed files with 155 additions and 0 deletions
--- a/test/kernels/bang/test_bang_codegen.cc
+++ b/test/kernels/bang/test_bang_codegen.cc
@ -0,0 +1,77 @@
+#include "bang/bang_runtime.h"
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/runtime.h"
+#include "operators/element_wise.h"
+#include "operators/softmax.h"
+#include "operators/unary.h"
+
+#include "test.h"
+
+namespace infini {
+
+TEST(BANG_Codegen, run) {
+    // Benchmark Settings
+    int warmupRounds = 100;
+    int timingRounds = 200;
+    auto INPUT_SHAPE = Shape{224, 768};
+    auto dtype = DataType::Float32;
+
+    // Get data size
+    size_t size = 1;
+    for (auto dim : INPUT_SHAPE) {
+        size *= dim;
+    }
+    size_t sizeInBytes = size * sizeof(dtype);
+
+    // Create runtime
+    Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
+    auto bangRuntime = make_ref<BangRuntimeObj>();
+
+    // Build cpu graph
+    Graph gCpu = make_ref<GraphObj>(cpuRuntime);
+    auto aCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto bCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto cCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto dCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+
+    // Build input data on CPU
+    gCpu->dataMalloc();
+    aCpu->setData(IncrementalGenerator());
+    bCpu->setData(IncrementalGenerator());
+    cCpu->setData(OneGenerator());
+    dCpu->setData(IncrementalGenerator());
+
+    // Build BANG graph
+    Graph g = make_ref<GraphObj>(bangRuntime);
+    auto a = g->cloneTensor(aCpu);
+    auto b = g->cloneTensor(bCpu);
+    auto c = g->cloneTensor(cCpu);
+    auto d = g->cloneTensor(dCpu);
+    auto add = g->addOp<AddObj>(a, b, nullptr);
+    auto temp1 = add->getOutput();
+    auto sub = g->addOp<SubObj>(temp1, c, nullptr);
+    auto temp2 = sub->getOutput();
+    auto sqrt = g->addOp<SqrtObj>(temp2, nullptr);
+    auto temp3 = sqrt->getOutput();
+    auto mul = g->addOp<MulObj>(d, temp3, nullptr);
+    auto temp4 = mul->getOutput();
+    auto softmax = g->addOp<SigmoidObj>(temp4, nullptr);
+
+    // allocate BANG memory
+    g->dataMalloc();
+
+    double time_op = 0.0;
+
+    // Execute on BANG and time
+    time_op +=
+        timeit([&]() { bangRuntime->run(g); }, [&]() { bangRuntime->sync(); },
+               warmupRounds, timingRounds);
+
+    printf("Operator - Softmax:\n");
+    printf("Input shape: (%d, %d)\n", INPUT_SHAPE[0], INPUT_SHAPE[1]);
+    printf("Input size: %ld, dtype: %s, size in bytes: %ld\n", size,
+           dtype.toString().c_str(), sizeInBytes);
+    printf("Time in total: %.6lf ms\n", time_op);
+}
+} // namespace infini
--- a/test/kernels/cuda/test_cuda_codegen.cc
+++ b/test/kernels/cuda/test_cuda_codegen.cc
@ -0,0 +1,78 @@
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/runtime.h"
+#include "cuda/cuda_runtime.h"
+#include "cuda/cuda_utility.h"
+#include "operators/element_wise.h"
+#include "operators/softmax.h"
+#include "operators/unary.h"
+
+#include "test.h"
+
+namespace infini {
+
+TEST(CUDA_Codegen, run) {
+    // Benchmark Settings
+    int warmupRounds = 100;
+    int timingRounds = 200;
+    auto INPUT_SHAPE = Shape{224, 768};
+    auto dtype = DataType::Float32;
+
+    // Get data size
+    size_t size = 1;
+    for (auto dim : INPUT_SHAPE) {
+        size *= dim;
+    }
+    size_t sizeInBytes = size * sizeof(dtype);
+
+    // Create runtime
+    Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
+    auto cudaRuntime = make_ref<CudaRuntimeObj>();
+
+    // Build cpu graph
+    Graph gCpu = make_ref<GraphObj>(cpuRuntime);
+    auto aCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto bCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto cCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+    auto dCpu = gCpu->addTensor(INPUT_SHAPE, dtype);
+
+    // Build input data on CPU
+    gCpu->dataMalloc();
+    aCpu->setData(IncrementalGenerator());
+    bCpu->setData(IncrementalGenerator());
+    cCpu->setData(OneGenerator());
+    dCpu->setData(IncrementalGenerator());
+
+    // Build CUDA graph
+    Graph g = make_ref<GraphObj>(cudaRuntime);
+    auto a = g->cloneTensor(aCpu);
+    auto b = g->cloneTensor(bCpu);
+    auto c = g->cloneTensor(cCpu);
+    auto d = g->cloneTensor(dCpu);
+    auto add = g->addOp<AddObj>(a, b, nullptr);
+    auto temp1 = add->getOutput();
+    auto sub = g->addOp<SubObj>(temp1, c, nullptr);
+    auto temp2 = sub->getOutput();
+    auto sqrt = g->addOp<SqrtObj>(temp2, nullptr);
+    auto temp3 = sqrt->getOutput();
+    auto mul = g->addOp<MulObj>(d, temp3, nullptr);
+    auto temp4 = mul->getOutput();
+    auto softmax = g->addOp<SigmoidObj>(temp4, nullptr);
+
+    // allocate CUDA memory
+    g->dataMalloc();
+
+    double time_op = 0.0;
+
+    // Execute on CUDA and time
+    time_op +=
+        timeit([&]() { cudaRuntime->run(g); }, [&]() { cudaRuntime->sync(); },
+               warmupRounds, timingRounds);
+
+    printf("Operator - Softmax:\n");
+    printf("Input shape: (%d, %d)\n", INPUT_SHAPE[0], INPUT_SHAPE[1]);
+    printf("Input size: %ld, dtype: %s, size in bytes: %ld\n", size,
+           dtype.toString().c_str(), sizeInBytes);
+    printf("Time in total: %.6lf ms\n", time_op);
+}
+} // namespace infini
Author	SHA1	Message	Date
Bolun	ee6dd3deac	update test	2023-10-12 14:07:23 +08:00
Bolun	c774c9182d	fix	2023-10-12 09:59:08 +08:00
Bolun	764702beb2	format	2023-10-11 15:01:01 +08:00
Bolun	3366cfa943	add cuda and bang test for codegen	2023-10-11 14:56:01 +08:00
Bolun	5a74f8fa4b	add test for codegen	2023-10-11 14:53:53 +08:00