2022-08-17 14:16:01 +08:00
|
|
|
#include "core/graph.h"
|
2022-08-22 15:01:03 +08:00
|
|
|
#include "core/runtime.h"
|
|
|
|
#include "cuda/cuda_runtime.h"
|
|
|
|
#include "cuda/cuda_utility.h"
|
2022-08-17 14:16:01 +08:00
|
|
|
#include "operators/conv.h"
|
|
|
|
#include "test.h"
|
|
|
|
|
|
|
|
namespace infini {
|
|
|
|
|
|
|
|
TEST(Conv, ShapeInference) {
|
2022-08-23 16:55:59 +08:00
|
|
|
Runtime runtime = CpuRuntimeObj::getInstance();
|
2022-08-17 14:16:01 +08:00
|
|
|
// Padding modes
|
|
|
|
{
|
2022-08-22 15:01:03 +08:00
|
|
|
Graph g = make_ref<GraphObj>(runtime);
|
2022-08-17 14:16:01 +08:00
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
|
|
|
|
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1);
|
|
|
|
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
|
|
|
|
}
|
|
|
|
{
|
2022-08-22 15:01:03 +08:00
|
|
|
Graph g = make_ref<GraphObj>(runtime);
|
2022-08-17 14:16:01 +08:00
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
|
|
|
|
auto conv =
|
|
|
|
g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Same);
|
|
|
|
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 4, 4}));
|
|
|
|
}
|
|
|
|
{
|
2022-08-22 15:01:03 +08:00
|
|
|
Graph g = make_ref<GraphObj>(runtime);
|
2022-08-17 14:16:01 +08:00
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
|
|
|
|
auto conv =
|
|
|
|
g->addOp<ConvObj>(i0, w0, nullptr, ConvObj::PaddingMode::Valid);
|
|
|
|
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
|
|
|
|
}
|
|
|
|
{ // dilation & stride
|
2022-08-22 15:01:03 +08:00
|
|
|
Graph g = make_ref<GraphObj>(runtime);
|
2022-08-17 14:16:01 +08:00
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
|
|
|
|
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
|
|
|
|
EXPECT_EQ(conv->getOutput()->getDims(), (Shape{1, 2, 2, 2}));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Conv, NaiveCPU) {
|
2022-08-23 16:55:59 +08:00
|
|
|
Runtime runtime = CpuRuntimeObj::getInstance();
|
2022-08-22 15:01:03 +08:00
|
|
|
Graph g = make_ref<GraphObj>(runtime);
|
2022-08-17 14:16:01 +08:00
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::UInt32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::UInt32);
|
|
|
|
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
|
|
|
|
|
|
|
|
g->dataMalloc();
|
|
|
|
i0->setData(IncrementalGenerator());
|
|
|
|
w0->setData(IncrementalGenerator());
|
2022-08-22 15:01:03 +08:00
|
|
|
runtime->run(g, true, true);
|
|
|
|
double perfTime = runtime->getPerfTime(g);
|
|
|
|
// The example Conv takes 0.015ms with one core
|
2022-08-17 14:16:01 +08:00
|
|
|
EXPECT_GT(perfTime, 0);
|
2022-08-22 15:01:03 +08:00
|
|
|
EXPECT_LT(perfTime, 0.1);
|
2022-08-17 14:16:01 +08:00
|
|
|
// check answer
|
2022-08-23 16:55:59 +08:00
|
|
|
auto ans =
|
|
|
|
make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32, runtime);
|
2022-08-22 15:01:03 +08:00
|
|
|
ans->dataMalloc(runtime);
|
|
|
|
ans->copyData(
|
|
|
|
vector<uint32_t>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
|
2022-08-17 14:16:01 +08:00
|
|
|
EXPECT_TRUE(conv->getOutput()->equalData(ans));
|
|
|
|
}
|
|
|
|
|
2022-08-22 15:01:03 +08:00
|
|
|
void testConvCudnn(
|
|
|
|
const std::function<void(void *, size_t, DataType)> &generator,
|
|
|
|
vector<float> ansVec) {
|
2022-08-23 16:55:59 +08:00
|
|
|
Runtime cpuRuntime = CpuRuntimeObj::getInstance();
|
2022-08-22 15:01:03 +08:00
|
|
|
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
|
|
|
// Build CUDA graph
|
|
|
|
Graph g = make_ref<GraphObj>(cudaRuntime);
|
|
|
|
Tensor i0 = g->addTensor({1, 3, 4, 4}, DataType::Float32);
|
|
|
|
Tensor w0 = g->addTensor({2, 3, 3, 3}, DataType::Float32);
|
|
|
|
auto conv = g->addOp<ConvObj>(i0, w0, nullptr, 1, 1, 2, 1, 1, 2);
|
|
|
|
|
|
|
|
// allocate CUDA memory
|
|
|
|
g->dataMalloc();
|
|
|
|
|
|
|
|
// Build input and output data on CPU
|
2022-08-23 16:55:59 +08:00
|
|
|
auto cpui0 =
|
|
|
|
make_ref<TensorObj>(Shape{1, 3, 4, 4}, DataType::Float32, cpuRuntime);
|
2022-08-22 15:01:03 +08:00
|
|
|
cpui0->dataMalloc(cpuRuntime);
|
|
|
|
cpui0->setData(generator);
|
|
|
|
|
2022-08-23 16:55:59 +08:00
|
|
|
auto cpuw0 =
|
|
|
|
make_ref<TensorObj>(Shape{2, 3, 3, 3}, DataType::Float32, cpuRuntime);
|
2022-08-22 15:01:03 +08:00
|
|
|
cpuw0->dataMalloc(cpuRuntime);
|
|
|
|
cpuw0->setData(generator);
|
|
|
|
|
2022-08-23 16:55:59 +08:00
|
|
|
auto ans =
|
|
|
|
make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
2022-08-22 15:01:03 +08:00
|
|
|
ans->dataMalloc(cpuRuntime);
|
|
|
|
ans->copyData(ansVec);
|
|
|
|
|
|
|
|
// Copy inputs from CPU to CUDA
|
2022-08-23 16:55:59 +08:00
|
|
|
i0->copyData(cpui0);
|
|
|
|
w0->copyData(cpuw0);
|
2022-08-22 15:01:03 +08:00
|
|
|
// Execute on CUDA
|
|
|
|
cudaRuntime->run(g);
|
|
|
|
// double perfTime = cudaRuntime->getPerfTime(g);
|
|
|
|
// // The example Conv takes 0.015ms with one core
|
|
|
|
// EXPECT_GT(perfTime, 0);
|
|
|
|
// EXPECT_LT(perfTime, 0.1);
|
|
|
|
|
|
|
|
// copy CUDA output to CPU
|
|
|
|
auto o0 = conv->getOutput();
|
2022-08-23 16:55:59 +08:00
|
|
|
auto cpuo0 =
|
|
|
|
make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
2022-08-22 15:01:03 +08:00
|
|
|
cpuo0->dataMalloc(cpuRuntime);
|
2022-08-23 16:55:59 +08:00
|
|
|
cpuo0->copyData(o0);
|
2022-08-22 15:01:03 +08:00
|
|
|
|
|
|
|
// check results on CPU
|
2022-08-23 16:55:59 +08:00
|
|
|
EXPECT_TRUE(cpuo0->equalData(ans));
|
2022-08-22 15:01:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Conv, cuDNN) {
|
|
|
|
testConvCudnn(OneGenerator(),
|
|
|
|
vector<float>{12, 12, 18, 18, 12, 12, 18, 18});
|
|
|
|
testConvCudnn(
|
|
|
|
IncrementalGenerator(),
|
|
|
|
vector<float>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
|
|
|
|
}
|
2022-08-17 14:16:01 +08:00
|
|
|
} // namespace infini
|