#include "core/graph.h" #include "core/runtime.h" #include "cuda/cuda_runtime.h" #include "cuda/cuda_utility.h" #include "operators/layer_norm.h" #include "test.h" namespace infini { void test_layernorm( const Shape &inputShape, const vector &inputData, const Shape &scaleShape, const vector &scaleData, float eps, int axis, int stash_type, const vector &ExpectData, const std::optional &bShape = std::nullopt, const std::optional> &biasData = std::nullopt) { Runtime runtime = NativeCpuRuntimeObj::getInstance(); Graph gCpu = make_ref(runtime); if (bShape.has_value() && biasData.has_value()) { Shape biasShape = *bShape; auto bias = gCpu->addTensor(biasShape, DataType::Float32); auto input = gCpu->addTensor(inputShape, DataType::Float32); auto scale = gCpu->addTensor(scaleShape, DataType::Float32); gCpu->dataMalloc(); bias->copyin(*biasData); // // bias->printData(); input->copyin(inputData); scale->copyin(scaleData); // auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); auto biasGpu = gCuda->cloneTensor(bias); auto inputGpu = gCuda->cloneTensor(input); auto scaleGpu = gCuda->cloneTensor(scale); // gCpu->cloneTensor(biasGpu)->printData(); auto op = gCuda->addOp(inputGpu, scaleGpu, nullptr, biasGpu, eps, axis, stash_type); // LayernormObj gCuda->dataMalloc(); biasGpu->copyin(*biasData); // gCpu->cloneTensor(biasGpu)->printData(); inputGpu->copyin(inputData); scaleGpu->copyin(scaleData); cudaRuntime->run(gCuda); auto oCpu = gCpu->cloneTensor(op->getOutput()); // move Data from gpu to cpu oCpu->printData(); //->printData EXPECT_TRUE(oCpu->equalData(ExpectData)); } else { auto input = gCpu->addTensor(inputShape, DataType::Float32); auto scale = gCpu->addTensor(scaleShape, DataType::Float32); gCpu->dataMalloc(); input->copyin(inputData); scale->copyin(scaleData); // auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); auto inputGpu = gCuda->cloneTensor(input); auto scaleGpu = gCuda->cloneTensor(scale); auto op = gCuda->addOp(inputGpu, scaleGpu, nullptr, nullptr, eps, axis, stash_type); // LayernormObj gCuda->dataMalloc(); inputGpu->copyin(inputData); scaleGpu->copyin(scaleData); cudaRuntime->run(gCuda); auto oCpu = gCpu->cloneTensor(op->getOutput()); // move Data from gpu to cpu oCpu->printData(); //->printData EXPECT_TRUE(oCpu->equalData(ExpectData)); } } TEST(CUDA_Layernorm, run) { test_layernorm( Shape{2, 3, 2, 3}, vector{0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.}, Shape{3}, vector{0.3, 0.2, 0.5}, 1e-5, 3, 1, vector{ -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678}, Shape{3}, vector{0, 0, 0}); test_layernorm( Shape{2, 3, 2, 3}, vector{0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.}, Shape{3}, vector{0.3, 0.2, 0.5}, 1e-5, 3, 1, vector{ -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679, -0.0674207, 0.2000000, 1.1123679}, Shape{3}, vector{0.3, 0.2, 0.5}); test_layernorm( Shape{2, 3, 2, 3}, vector{0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.}, Shape{1}, vector{0.3}, 1e-5, 3, 1, vector{ -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207, -0.0674207, 0.2000000, 0.8674207}, Shape{3}, vector{0.3, 0.2, 0.5}); test_layernorm( Shape{2, 3, 2, 3}, vector{0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.}, Shape{3}, vector{0.3, 0.2, 0.5}, 1e-5, 3, 1, vector{-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678}); } // python output } // namespace infini