forked from jiuyuan/InfiniTensor
Xpu task4 support: add softmax (#172)
* add softmax on kunlun * format --------- Co-authored-by: Bolun <bolunz@u.nus.edu> Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
feccd4f318
commit
23b825efc4
|
@ -0,0 +1,26 @@
|
|||
#include "operators/softmax.h"
|
||||
#include "kunlun/kunlun_kernel_without_config.h"
|
||||
#include "kunlun/kunlun_runtime.h"
|
||||
|
||||
namespace infini {
|
||||
class SoftmaxXdnn : public KUNLUNKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<SoftmaxObj>(_op);
|
||||
auto context = dynamic_cast<const KUNLUNRuntimeObj *>(_context);
|
||||
auto dim = op->getInputs(0)->getDims();
|
||||
auto axis = op->getAxis();
|
||||
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
|
||||
auto ret = baidu::xpu::api::softmax<float>(
|
||||
context->KUNLUNHandle(), (float *)aData, (float *)cData, dim, axis);
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::KUNLUN, OpType::Softmax, DataType::Float32, SoftmaxXdnn,
|
||||
"Softmax_xdnn_KUNLUN_Float32");
|
||||
}; // namespace infini
|
|
@ -0,0 +1,136 @@
|
|||
#include "core/graph.h"
|
||||
#include "core/kernel.h"
|
||||
#include "core/runtime.h"
|
||||
#include "kunlun/kunlun_runtime.h"
|
||||
#include "operators/softmax.h"
|
||||
#include "test.h"
|
||||
#include <cmath>
|
||||
namespace infini {
|
||||
|
||||
TEST(XDNN_Softmax, run_axis1) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto kunlunRuntime = make_ref<KUNLUNRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu =
|
||||
make_ref<TensorObj>(Shape{2, 4}, DataType::Float32, cpuRuntime);
|
||||
|
||||
// KUNLUN XPU
|
||||
Graph kunlunGraph = make_ref<GraphObj>(kunlunRuntime);
|
||||
auto inputKunlun = kunlunGraph->cloneTensor(inputCpu);
|
||||
auto kunlunOp = kunlunGraph->addOp<SoftmaxObj>(inputKunlun, nullptr, 1);
|
||||
kunlunGraph->dataMalloc();
|
||||
inputKunlun->copyin(vector<float>{0, 1, 2, 3, 10000, 10001, 10002, 10003});
|
||||
kunlunRuntime->run(kunlunGraph);
|
||||
auto outputKunlun = kunlunOp->getOutput();
|
||||
auto outputKunlun2Cpu = outputKunlun->clone(cpuRuntime);
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(outputKunlun2Cpu->equalData(
|
||||
vector<float>{0.032058604, 0.08714432, 0.23688284, 0.6439143,
|
||||
0.032058604, 0.08714432, 0.23688284, 0.6439143}));
|
||||
}
|
||||
|
||||
TEST(XDNN_Softmax, run_axis0) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto kunlunRuntime = make_ref<KUNLUNRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu =
|
||||
make_ref<TensorObj>(Shape{2, 4}, DataType::Float32, cpuRuntime);
|
||||
|
||||
// KUNLUN XPU
|
||||
Graph kunlunGraph = make_ref<GraphObj>(kunlunRuntime);
|
||||
auto inputKunlun = kunlunGraph->cloneTensor(inputCpu);
|
||||
auto kunlunOp = kunlunGraph->addOp<SoftmaxObj>(inputKunlun, nullptr, 0);
|
||||
kunlunGraph->dataMalloc();
|
||||
inputKunlun->copyin(vector<float>{0, 1, 2, 3, 10000, 10001, 10002, 10003});
|
||||
kunlunRuntime->run(kunlunGraph);
|
||||
auto outputKunlun = kunlunOp->getOutput();
|
||||
auto outputKunlun2Cpu = outputKunlun->clone(cpuRuntime);
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(
|
||||
outputKunlun2Cpu->equalData(vector<float>{0., 0., 0., 0., 1, 1, 1, 1}));
|
||||
}
|
||||
|
||||
TEST(XDNN_Softmax2, run_axis1) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto kunlunRuntime = make_ref<KUNLUNRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu =
|
||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
||||
|
||||
// KUNLUN XPU
|
||||
Graph kunlunGraph = make_ref<GraphObj>(kunlunRuntime);
|
||||
auto inputKunlun = kunlunGraph->cloneTensor(inputCpu);
|
||||
auto kunlunOp = kunlunGraph->addOp<SoftmaxObj>(inputKunlun, nullptr, 1);
|
||||
kunlunGraph->dataMalloc();
|
||||
inputKunlun->setData(IncrementalGenerator());
|
||||
kunlunRuntime->run(kunlunGraph);
|
||||
auto outputKunlun = kunlunOp->getOutput();
|
||||
auto outputKunlun2Cpu = outputKunlun->clone(cpuRuntime);
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(outputKunlun2Cpu->equalData(vector<float>{
|
||||
0.0179862, 0.0179862, 0.0179862, 0.0179862, 0.9820138, 0.9820138,
|
||||
0.9820138, 0.9820138, 0.0179862, 0.0179862, 0.0179862, 0.0179862,
|
||||
0.9820138, 0.9820138, 0.9820138, 0.9820138}));
|
||||
}
|
||||
|
||||
TEST(XDNN_Softmax2, run_axis2) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto kunlunRuntime = make_ref<KUNLUNRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu =
|
||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
||||
|
||||
// KUNLUN XPU
|
||||
Graph kunlunGraph = make_ref<GraphObj>(kunlunRuntime);
|
||||
auto inputKunlun = kunlunGraph->cloneTensor(inputCpu);
|
||||
auto kunlunOp = kunlunGraph->addOp<SoftmaxObj>(inputKunlun, nullptr, 2);
|
||||
kunlunGraph->dataMalloc();
|
||||
inputKunlun->setData(IncrementalGenerator());
|
||||
kunlunRuntime->run(kunlunGraph);
|
||||
auto outputKunlun = kunlunOp->getOutput();
|
||||
auto outputKunlun2Cpu = outputKunlun->clone(cpuRuntime);
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(outputKunlun2Cpu->equalData(vector<float>{
|
||||
0.1192029, 0.1192029, 0.8807971, 0.8807971, 0.1192029, 0.1192029,
|
||||
0.8807971, 0.8807971, 0.1192029, 0.1192029, 0.8807971, 0.8807971,
|
||||
0.1192029, 0.1192029, 0.8807971, 0.8807971}));
|
||||
}
|
||||
|
||||
TEST(XDNN_Softmax2, run_axis3) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto kunlunRuntime = make_ref<KUNLUNRuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu =
|
||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
||||
|
||||
// KUNLUN XPU
|
||||
Graph kunlunGraph = make_ref<GraphObj>(kunlunRuntime);
|
||||
auto inputKunlun = kunlunGraph->cloneTensor(inputCpu);
|
||||
auto kunlunOp = kunlunGraph->addOp<SoftmaxObj>(inputKunlun, nullptr, 3);
|
||||
kunlunGraph->dataMalloc();
|
||||
inputKunlun->setData(IncrementalGenerator());
|
||||
kunlunRuntime->run(kunlunGraph);
|
||||
auto outputKunlun = kunlunOp->getOutput();
|
||||
auto outputKunlun2Cpu = outputKunlun->clone(cpuRuntime);
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(outputKunlun2Cpu->equalData(vector<float>{
|
||||
0.2689414, 0.7310586, 0.2689414, 0.7310586, 0.2689414, 0.7310586,
|
||||
0.2689414, 0.7310586, 0.2689414, 0.7310586, 0.2689414, 0.7310586,
|
||||
0.2689414, 0.7310586, 0.2689414, 0.7310586}));
|
||||
}
|
||||
} // namespace infini
|
Loading…
Reference in New Issue