InfiniTensor/test/operators/test_conv.cc

#include "core/graph.h"
#include "core/runtime.h"
#include "cuda/cuda_runtime.h"
#include "cuda/cuda_utility.h"
#include "operators/conv.h"
#include "test.h"

namespace infini {

TEST(Conv, ShapeInference) {
    Runtime runtime = CpuRuntimeObj::getInstance();
    // Padding modes
    {
        Graph g = make_ref<GraphObj>(runtime);
        Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
        Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
        auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1);
        EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
    }
    {
        Graph g = make_ref<GraphObj>(runtime);
        Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
        Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
        auto conv =
            g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Same);
        EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
    }
    {
        Graph g = make_ref<GraphObj>(runtime);
        Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
        Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
        auto conv =
            g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Valid);
        EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
    }
    { // dilation & stride
        Graph g = make_ref<GraphObj>(runtime);
        Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
        Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
        auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
        EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
    }
}

TEST(Conv, NaiveCPU) {
    Runtime runtime = CpuRuntimeObj::getInstance();
    Graph g = make_ref<GraphObj>(runtime);
    Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
    Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
    auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);

    g->dataMalloc();
    i0->setData(IncrementalGenerator());
    w0->setData(IncrementalGenerator());
    runtime->run(g, true, true);
    double perfTime = runtime->getPerfTime(g);
    // The example Conv takes 0.015ms with one core
    EXPECT_GT(perfTime, 0);
    EXPECT_LT(perfTime, 0.1);
    // check answer
    auto ans =
        make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32, runtime);
    ans->dataMalloc(runtime);
    ans->copyData(
        vector<uint32_t>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
    EXPECT_TRUE(conv->getOutput()->equalData(ans));
}

void testConvCudnn(
    const std::function<void(void *, size_t, DataType)> &generator,
    vector<float> ansVec) {
    Runtime cpuRuntime = CpuRuntimeObj::getInstance();
    auto cudaRuntime = make_ref<CudaRuntimeObj>();
    // Build CUDA graph
    Graph g = make_ref<GraphObj>(cudaRuntime);
    Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::Float32);
    Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::Float32);
    auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);

    // allocate CUDA memory
    g->dataMalloc();

    // Build input and output data on CPU
    auto cpui0 =
        make_ref<TensorObj>(Shape{1, 3, 4, 4}, DataType::Float32, cpuRuntime);
    cpui0->dataMalloc(cpuRuntime);
    cpui0->setData(generator);

    auto cpuw0 =
        make_ref<TensorObj>(Shape{2, 3, 3, 3}, DataType::Float32, cpuRuntime);
    cpuw0->dataMalloc(cpuRuntime);
    cpuw0->setData(generator);

    auto ans =
        make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);
    ans->dataMalloc(cpuRuntime);
    ans->copyData(ansVec);

    // Copy inputs from CPU to CUDA
    i0->copyData(cpui0);
    w0->copyData(cpuw0);
    // Execute on CUDA
    cudaRuntime->run(g);
    // double perfTime = cudaRuntime->getPerfTime(g);
    // // The example Conv takes 0.015ms with one core
    // EXPECT_GT(perfTime, 0);
    // EXPECT_LT(perfTime, 0.1);

    // copy CUDA output to CPU
    auto o0 = conv->getOutput();
    auto cpuo0 =
        make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);
    cpuo0->dataMalloc(cpuRuntime);
    cpuo0->copyData(o0);

    // check results on CPU
    EXPECT_TRUE(cpuo0->equalData(ans));
}

TEST(Conv, cuDNN) {
    testConvCudnn(OneGenerator(),
                  vector<float>{12, 12, 18, 18, 12, 12, 18, 18});
    testConvCudnn(
        IncrementalGenerator(),
        vector<float>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
}
} // namespace infini
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`#include "core/graph.h"`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`#include "core/runtime.h"`
			`#include "cuda/cuda_runtime.h"`
			`#include "cuda/cuda_utility.h"`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`#include "operators/conv.h"`
			`#include "test.h"`

			`namespace infini {`

			`TEST(Conv, ShapeInference) {`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`Runtime runtime = CpuRuntimeObj::getInstance();`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`// Padding modes`
			`{`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`Graph g = make_ref<GraphObj>(runtime);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);`
			`auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1);`
			`EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));`
			`}`
			`{`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`Graph g = make_ref<GraphObj>(runtime);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);`
			`auto conv =`
			`g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Same);`
			`EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));`
			`}`
			`{`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`Graph g = make_ref<GraphObj>(runtime);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);`
			`auto conv =`
			`g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Valid);`
			`EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));`
			`}`
			`{ // dilation & stride`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`Graph g = make_ref<GraphObj>(runtime);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);`
			`auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);`
			`EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));`
			`}`
			`}`

			`TEST(Conv, NaiveCPU) {`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`Runtime runtime = CpuRuntimeObj::getInstance();`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`Graph g = make_ref<GraphObj>(runtime);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);`
			`auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);`

			`g->dataMalloc();`
			`i0->setData(IncrementalGenerator());`
			`w0->setData(IncrementalGenerator());`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`runtime->run(g, true, true);`
			`double perfTime = runtime->getPerfTime(g);`
			`// The example Conv takes 0.015ms with one core`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`EXPECT_GT(perfTime, 0);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`EXPECT_LT(perfTime, 0.1);`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`// check answer`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`auto ans =`
			`make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32, runtime);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`ans->dataMalloc(runtime);`
			`ans->copyData(`
			`vector<uint32_t>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`EXPECT_TRUE(conv->getOutput()->equalData(ans));`
			`}`

Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`void testConvCudnn(`
			`const std::function<void(void *, size_t, DataType)> &generator,`
			`vector<float> ansVec) {`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`Runtime cpuRuntime = CpuRuntimeObj::getInstance();`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`auto cudaRuntime = make_ref<CudaRuntimeObj>();`
			`// Build CUDA graph`
			`Graph g = make_ref<GraphObj>(cudaRuntime);`
			`Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::Float32);`
			`Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::Float32);`
			`auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);`

			`// allocate CUDA memory`
			`g->dataMalloc();`

			`// Build input and output data on CPU`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`auto cpui0 =`
			`make_ref<TensorObj>(Shape{1, 3, 4, 4}, DataType::Float32, cpuRuntime);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`cpui0->dataMalloc(cpuRuntime);`
			`cpui0->setData(generator);`

Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`auto cpuw0 =`
			`make_ref<TensorObj>(Shape{2, 3, 3, 3}, DataType::Float32, cpuRuntime);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`cpuw0->dataMalloc(cpuRuntime);`
			`cpuw0->setData(generator);`

Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`auto ans =`
			`make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`ans->dataMalloc(cpuRuntime);`
			`ans->copyData(ansVec);`

			`// Copy inputs from CPU to CUDA`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`i0->copyData(cpui0);`
			`w0->copyData(cpuw0);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`// Execute on CUDA`
			`cudaRuntime->run(g);`
			`// double perfTime = cudaRuntime->getPerfTime(g);`
			`// // The example Conv takes 0.015ms with one core`
			`// EXPECT_GT(perfTime, 0);`
			`// EXPECT_LT(perfTime, 0.1);`

			`// copy CUDA output to CPU`
			`auto o0 = conv->getOutput();`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`auto cpuo0 =`
			`make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`cpuo0->dataMalloc(cpuRuntime);`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`cpuo0->copyData(o0);`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00
			`// check results on CPU`
Extended DataType class and Runtime interaction (#9) * Add: DataType class * Add: data-type-oblivious tensor interface * Rename: copyBlobToCPU Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-23 16:55:59 +08:00			`EXPECT_TRUE(cpuo0->equalData(ans));`
Add CUDA runtime (#6) * Fix: add warm-up and repetition in timing * Add: CUDA runtime and float support * Refactor: Cuda and Cpu runtimes inherit Runtime * Add: environment script for Lotus * Add: Lotus build instructions * Update README.md Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-22 15:01:03 +08:00			`}`

			`TEST(Conv, cuDNN) {`
			`testConvCudnn(OneGenerator(),`
			`vector<float>{12, 12, 18, 18, 12, 12, 18, 18});`
			`testConvCudnn(`
			`IncrementalGenerator(),`
			`vector<float>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});`
			`}`
Add Conv operator and naive CPU implemenation (#5) * Add: Conv definition * Add: tensor copy data from vector * Add: CPU conv kernel * Fix: replace Int32 with UInt32 in DataType Co-authored-by: Liyan Zheng <liyan-zheng@outlook.com> 2022-08-17 14:16:01 +08:00			`} // namespace infini`