InfiniTensor/test/kernels/cuda/test_cuda_inception.cc

85 lines
2.9 KiB
C++

#include "core/graph.h"
#include "core/kernel.h"
#include "core/runtime.h"
#include "cuda/cuda_runtime.h"
#include "cuda/cuda_utility.h"
#include "operators/concat.h"
#include "operators/conv.h"
#include "operators/pooling.h"
#include "operators/unary.h"
#include "test.h"
namespace infini {
TEST(CUDA_Inception_v3_block, run) {
const int bs = 1, initialChannels = 192, h = 32;
auto cudaRuntime = make_ref<CudaRuntimeObj>();
auto g = make_ref<GraphObj>(cudaRuntime);
auto blockInput = g->addTensor({bs, initialChannels, h, h});
vector<vector<tuple<bool, int, int>>> configs =
// <isConv, f, r/s>
{
{{true, 64, 1}}, // a chain with one Conv
{{true, 48, 1}, {true, 64, 5}},
{{true, 64, 1}, {true, 96, 3}, {true, 96, 3}},
{{false, 192, 3}, {true, 32, 3}},
};
TensorVec outputs;
vector<OpVec> ops;
auto maxpool =
g->addOp<MaxPoolObj>(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1, 0);
auto chainInput = maxpool->getOutput();
for (auto &pathConfig : configs) {
int inputChannels = initialChannels;
auto input = chainInput;
ops.emplace_back();
for (auto &[isConv, f, r] : pathConfig) { // OpConfig
if (isConv) {
{ // Add Conv
auto w = g->addTensor({f, inputChannels, r, r});
auto conv =
g->addOp<ConvObj>(input, w, nullptr, r / 2, r / 2);
input = conv->getOutput();
ops.back().emplace_back(conv);
}
{ // Add Relu
auto relu = g->addOp<ReluObj>(input, nullptr);
input = relu->getOutput();
ops.back().emplace_back(relu);
}
inputChannels = f;
} else { // Add AveragePool
auto pool = g->addOp<AvgPoolObj>(input, nullptr, r, r, 1, 1,
r / 2, r / 2, 1, 1, 0);
input = pool->getOutput();
ops.back().emplace_back(pool);
}
}
outputs.emplace_back(input);
}
auto concat = g->addOp<ConcatObj>(outputs, nullptr, 1);
g->print();
// check connection
EXPECT_EQ(maxpool->getSuccessors().size(), 4u);
EXPECT_EQ(chainInput->getTargets().size(), 4u);
for (const auto &chainOps : ops) {
for (size_t i = 1; i < chainOps.size(); i++) {
auto prev = chainOps[i - 1];
auto cur = chainOps[i];
EXPECT_EQ(prev->getSuccessors().size(), 1u);
EXPECT_EQ(cur->getPredecessors().size(), 1u);
EXPECT_EQ(prev->getSuccessors()[0], cur);
EXPECT_EQ(prev, cur->getPredecessors()[0]);
}
}
EXPECT_EQ(concat->getPredecessors().size(), 4u);
// TODO: check outputs
g->dataMalloc();
cudaRuntime->run(g);
}
}; // namespace infini