forked from jiuyuan/InfiniTensor
38 lines
1.3 KiB
C++
38 lines
1.3 KiB
C++
#include "core/graph.h"
|
|
#include "core/kernel.h"
|
|
#include "core/runtime.h"
|
|
#include "cuda/cuda_runtime.h"
|
|
#include "cuda/cuda_utility.h"
|
|
#include "operators/G2BMM.h"
|
|
|
|
#include "test.h"
|
|
|
|
namespace infini {
|
|
using ExpectOutput = vector<float>;
|
|
|
|
TEST(CUDA_G2BMM, ShapeInference) {
|
|
const int bs = 1, seqlen = 10000, w = 1000, featlen = 512, heads = 8, d = 4;
|
|
const int hidden = featlen, hiddenPerHead = hidden / heads;
|
|
auto cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
Graph gCpu = make_ref<GraphObj>(cpuRuntime);
|
|
auto ACpu = gCpu->addTensor(Shape{bs * heads, seqlen, hiddenPerHead},
|
|
DataType::Float32);
|
|
auto BCpu = gCpu->addTensor(Shape{bs * heads, seqlen, hiddenPerHead},
|
|
DataType::Float32);
|
|
gCpu->dataMalloc();
|
|
ACpu->setData(IncrementalGenerator());
|
|
BCpu->setData(IncrementalGenerator());
|
|
|
|
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
|
auto gCuda = make_ref<GraphObj>(cudaRuntime);
|
|
auto ACuda = gCuda->cloneTensor(ACpu);
|
|
auto BCuda = gCuda->cloneTensor(BCpu);
|
|
auto G2BMM = gCuda->addOp<G2BMMObj>(ACuda, BCuda, nullptr, w, d);
|
|
EXPECT_EQ(G2BMM->getOutput()->getDims(),
|
|
(Shape{bs * heads, seqlen, 2 * w + 1}));
|
|
gCuda->dataMalloc();
|
|
cudaRuntime->run(gCuda);
|
|
}
|
|
|
|
}; // namespace infini
|