forked from jiuyuan/InfiniTensor
support 8D tensor, add test example (#170)
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
23b825efc4
commit
ec3adf6fa7
|
@ -3,7 +3,7 @@
|
|||
#include <cstdio>
|
||||
|
||||
const int BATCH_SIZE = 32; // parallel tensor number.
|
||||
const int DIM_MAX_SIZE = 4;
|
||||
const int DIM_MAX_SIZE = 8;
|
||||
|
||||
// Concat operator acts like element tensors composing to one big tensor,and
|
||||
// split operator acts like one big tensor being composed by element
|
||||
|
|
|
@ -122,4 +122,40 @@ TEST(Concat, Cuda_dim0) {
|
|||
EXPECT_TRUE(oCpu->equalData(vector<float>{0, 1, 2, 1, 1, 1, 0, 1, 2}));
|
||||
}
|
||||
|
||||
TEST(Concat, CudaHigh) {
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph gCpu = make_ref<GraphObj>(runtime);
|
||||
|
||||
auto t1 = gCpu->addTensor({2, 2, 3, 1, 2}, DataType::Float32);
|
||||
auto t2 = gCpu->addTensor({2, 2, 1, 1, 2}, DataType::Float32);
|
||||
auto t3 = gCpu->addTensor({2, 2, 2, 1, 2}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
t1->setData(IncrementalGenerator());
|
||||
t2->setData(OneGenerator());
|
||||
t3->setData(OneGenerator());
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
auto t1Gpu = gCuda->cloneTensor(t1);
|
||||
auto t2Gpu = gCuda->cloneTensor(t2);
|
||||
auto t3Gpu = gCuda->cloneTensor(t3);
|
||||
|
||||
auto op =
|
||||
gCuda->addOp<ConcatObj>(TensorVec{t1Gpu, t2Gpu, t3Gpu}, nullptr, 2);
|
||||
gCuda->dataMalloc();
|
||||
t1Gpu->setData(IncrementalGenerator());
|
||||
t2Gpu->setData(OneGenerator());
|
||||
t3Gpu->setData(OneGenerator());
|
||||
cudaRuntime->run(gCuda);
|
||||
|
||||
// cudaPrintTensor(op->getOutput());
|
||||
// copy output from CUDA to CPU
|
||||
auto oCpu = gCpu->cloneTensor(op->getOutput());
|
||||
EXPECT_TRUE(oCpu->equalData(
|
||||
vector<float>{0., 1., 2., 3., 4., 5., 1., 1., 1., 1., 1., 1.,
|
||||
6., 7., 8., 9., 10., 11., 1., 1., 1., 1., 1., 1.,
|
||||
12., 13., 14., 15., 16., 17., 1., 1., 1., 1., 1., 1.,
|
||||
18., 19., 20., 21., 22., 23., 1., 1., 1., 1., 1., 1.}));
|
||||
}
|
||||
} // namespace infini
|
||||
|
|
|
@ -39,6 +39,40 @@ TEST(Split, Cuda) {
|
|||
12, 13, 14, 15, 16, 17, 18, 19, 32, 33, 34, 35, 36, 37, 38, 39}));
|
||||
}
|
||||
|
||||
TEST(Split, CudaHigh) {
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph gCpu = make_ref<GraphObj>(runtime);
|
||||
|
||||
auto input = gCpu->addTensor({2, 6, 2, 1, 2}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->setData(IncrementalGenerator());
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
auto inputGpu = gCuda->cloneTensor(input);
|
||||
auto op = gCuda->addOp<SplitObj>(inputGpu, std::nullopt, 1, 3);
|
||||
gCuda->dataMalloc();
|
||||
inputGpu->setData(IncrementalGenerator());
|
||||
|
||||
cudaRuntime->run(gCuda);
|
||||
|
||||
// copy output from CUDA to CPU
|
||||
EXPECT_EQ(op->getOutputs().size(), (size_t)3);
|
||||
auto o0Cpu = gCpu->cloneTensor(op->getOutput(0));
|
||||
auto o1Cpu = gCpu->cloneTensor(op->getOutput(1));
|
||||
auto o2Cpu = gCpu->cloneTensor(op->getOutput(2));
|
||||
EXPECT_TRUE(
|
||||
o0Cpu->equalData(vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 24., 25.,
|
||||
26., 27., 28., 29., 30., 31.}));
|
||||
EXPECT_TRUE(o1Cpu->equalData(vector<float>{8., 9., 10., 11., 12., 13., 14.,
|
||||
15., 32., 33., 34., 35., 36.,
|
||||
37., 38., 39.}));
|
||||
EXPECT_TRUE(o2Cpu->equalData(vector<float>{16., 17., 18., 19., 20., 21.,
|
||||
22., 23., 40., 41., 42., 43.,
|
||||
44., 45., 46., 47.}));
|
||||
}
|
||||
|
||||
TEST(Split, Cuda_dim0) {
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph gCpu = make_ref<GraphObj>(runtime);
|
||||
|
|
Loading…
Reference in New Issue