InfiniTensor/test/kernels/cuda/test_cuda_layernorm.cc

147 lines
6.7 KiB
C++

#include "core/graph.h"
#include "core/runtime.h"
#include "cuda/cuda_runtime.h"
#include "cuda/cuda_utility.h"
#include "operators/layer_norm.h"
#include "test.h"
namespace infini {
void test_layernorm(
const Shape &inputShape, const vector<float> &inputData,
const Shape &scaleShape, const vector<float> &scaleData, float eps,
int axis, int stash_type, const vector<float> &ExpectData,
const std::optional<Shape> &bShape = std::nullopt,
const std::optional<std::vector<float>> &biasData = std::nullopt) {
Runtime runtime = NativeCpuRuntimeObj::getInstance();
Graph gCpu = make_ref<GraphObj>(runtime);
if (bShape.has_value() && biasData.has_value()) {
Shape biasShape = *bShape;
auto bias = gCpu->addTensor(biasShape, DataType::Float32);
auto input = gCpu->addTensor(inputShape, DataType::Float32);
auto scale = gCpu->addTensor(scaleShape, DataType::Float32);
gCpu->dataMalloc();
bias->copyin(*biasData); //
// bias->printData();
input->copyin(inputData);
scale->copyin(scaleData); //
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
auto biasGpu = gCuda->cloneTensor(bias);
auto inputGpu = gCuda->cloneTensor(input);
auto scaleGpu = gCuda->cloneTensor(scale);
// gCpu->cloneTensor(biasGpu)->printData();
auto op =
gCuda->addOp<LayerNormObj>(inputGpu, scaleGpu, nullptr, biasGpu,
eps, axis, stash_type); // LayernormObj
gCuda->dataMalloc();
biasGpu->copyin(*biasData);
// gCpu->cloneTensor(biasGpu)->printData();
inputGpu->copyin(inputData);
scaleGpu->copyin(scaleData);
cudaRuntime->run(gCuda);
auto oCpu =
gCpu->cloneTensor(op->getOutput()); // move Data from gpu to cpu
oCpu->printData(); //->printData
EXPECT_TRUE(oCpu->equalData(ExpectData));
} else {
auto input = gCpu->addTensor(inputShape, DataType::Float32);
auto scale = gCpu->addTensor(scaleShape, DataType::Float32);
gCpu->dataMalloc();
input->copyin(inputData);
scale->copyin(scaleData); //
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
auto inputGpu = gCuda->cloneTensor(input);
auto scaleGpu = gCuda->cloneTensor(scale);
auto op =
gCuda->addOp<LayerNormObj>(inputGpu, scaleGpu, nullptr, nullptr,
eps, axis, stash_type); // LayernormObj
gCuda->dataMalloc();
inputGpu->copyin(inputData);
scaleGpu->copyin(scaleData);
cudaRuntime->run(gCuda);
auto oCpu =
gCpu->cloneTensor(op->getOutput()); // move Data from gpu to cpu
oCpu->printData(); //->printData
EXPECT_TRUE(oCpu->equalData(ExpectData));
}
}
TEST(CUDA_Layernorm, run) {
test_layernorm(
Shape{2, 3, 2, 3},
vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 8.,
9., 10., 11., 12., 13., 14., 15., 16., 17.,
18., 19., 20., 21., 22., 23., 24., 25., 26.,
27., 28., 29., 30., 31., 32., 33., 34., 35.},
Shape{3}, vector<float>{0.3, 0.2, 0.5}, 1e-5, 3, 1,
vector<float>{
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678},
Shape{3}, vector<float>{0, 0, 0});
test_layernorm(
Shape{2, 3, 2, 3},
vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 8.,
9., 10., 11., 12., 13., 14., 15., 16., 17.,
18., 19., 20., 21., 22., 23., 24., 25., 26.,
27., 28., 29., 30., 31., 32., 33., 34., 35.},
Shape{3}, vector<float>{0.3, 0.2, 0.5}, 1e-5, 3, 1,
vector<float>{
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679,
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679,
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679,
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679,
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679,
-0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679},
Shape{3}, vector<float>{0.3, 0.2, 0.5});
test_layernorm(
Shape{2, 3, 2, 3},
vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 8.,
9., 10., 11., 12., 13., 14., 15., 16., 17.,
18., 19., 20., 21., 22., 23., 24., 25., 26.,
27., 28., 29., 30., 31., 32., 33., 34., 35.},
Shape{1}, vector<float>{0.3}, 1e-5, 3, 1,
vector<float>{
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207,
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207,
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207,
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207,
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207,
-0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207},
Shape{3}, vector<float>{0.3, 0.2, 0.5});
test_layernorm(
Shape{2, 3, 2, 3},
vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 8.,
9., 10., 11., 12., 13., 14., 15., 16., 17.,
18., 19., 20., 21., 22., 23., 24., 25., 26.,
27., 28., 29., 30., 31., 32., 33., 34., 35.},
Shape{3}, vector<float>{0.3, 0.2, 0.5}, 1e-5, 3, 1,
vector<float>{-0.3674207, 0.0000000, 0.6123678, -0.3674207,
0.0000000, 0.6123678, -0.3674207, 0.0000000,
0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207,
0.0000000, 0.6123678, -0.3674207, 0.0000000,
0.6123678, -0.3674207, 0.0000000, 0.6123678,
-0.3674207, 0.0000000, 0.6123678, -0.3674207,
0.0000000, 0.6123678, -0.3674207, 0.0000000,
0.6123678, -0.3674207, 0.0000000, 0.6123678});
} // python output
} // namespace infini