forked from jiuyuan/InfiniTensor
add bangSoftmax , compare cnnl and bang C
This commit is contained in:
parent
186a6f37f2
commit
1ed4b36db2
|
@ -1,11 +1,11 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "bang/bang_runtime.h"
|
#include "bang/bang_runtime.h"
|
||||||
#include "bang_highSoftmax.h"
|
#include "bang_bangSoftmax.h"
|
||||||
#include "operators/softmax.h"
|
#include "operators/softmax.h"
|
||||||
namespace infini {
|
namespace infini {
|
||||||
|
|
||||||
void softmax_kernel(const RuntimeObj *obj, const Operator &_op) {
|
void softmax_kernel(const RuntimeObj *obj, const Operator &_op) {
|
||||||
auto op = as<SoftmaxObj>(_op);
|
auto op = as<BangSoftmaxObj>(_op);
|
||||||
void *const mlu_src = (op->getInputs(0)->getRawDataPtr<void *>());
|
void *const mlu_src = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||||
void *const mlu_destination = (op->getOutput()->getRawDataPtr<void *>());
|
void *const mlu_destination = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ void softmax_kernel(const RuntimeObj *obj, const Operator &_op) {
|
||||||
othersize *= shape[s];
|
othersize *= shape[s];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (op->getOpType() == OpType::Softmax)
|
if (op->getOpType() == OpType::BangSoftmax)
|
||||||
softmaxKernel(context->cnnlHandle(), (float *)mlu_destination,
|
softmaxKernel(context->cnnlHandle(), (float *)mlu_destination,
|
||||||
(float *)mlu_src, nDim, axis, othersize, frontsize,
|
(float *)mlu_src, nDim, axis, othersize, frontsize,
|
||||||
dimsize, stride);
|
dimsize, stride);
|
||||||
|
|
|
@ -59,6 +59,7 @@ class GraphHandlerObj {
|
||||||
Tensor tanh(Tensor x, Tensor y);
|
Tensor tanh(Tensor x, Tensor y);
|
||||||
Tensor erf(Tensor x, Tensor y);
|
Tensor erf(Tensor x, Tensor y);
|
||||||
Tensor softmax(Tensor x, Tensor y, int axis);
|
Tensor softmax(Tensor x, Tensor y, int axis);
|
||||||
|
Tensor bangSoftmax(Tensor x, Tensor y, int axis);
|
||||||
Tensor abs(Tensor x, Tensor y);
|
Tensor abs(Tensor x, Tensor y);
|
||||||
Tensor sqrt(Tensor x, Tensor y);
|
Tensor sqrt(Tensor x, Tensor y);
|
||||||
Tensor neg(Tensor x, Tensor y);
|
Tensor neg(Tensor x, Tensor y);
|
||||||
|
|
|
@ -180,6 +180,7 @@ struct OpType {
|
||||||
Size,
|
Size,
|
||||||
Slice,
|
Slice,
|
||||||
Softmax,
|
Softmax,
|
||||||
|
BangSoftmax,
|
||||||
SoftmaxCrossEntropyLoss,
|
SoftmaxCrossEntropyLoss,
|
||||||
Softplus,
|
Softplus,
|
||||||
Softsign,
|
Softsign,
|
||||||
|
|
|
@ -24,4 +24,26 @@ class SoftmaxObj : public OperatorObj {
|
||||||
vector<int> getWorkloadVector() const override;
|
vector<int> getWorkloadVector() const override;
|
||||||
vector<int> getOpAttrVector() const override;
|
vector<int> getOpAttrVector() const override;
|
||||||
};
|
};
|
||||||
|
class BangSoftmaxObj : public OperatorObj {
|
||||||
|
int axis;
|
||||||
|
|
||||||
|
public:
|
||||||
|
BangSoftmaxObj(GraphObj *graph, Tensor input, Tensor output, int axis);
|
||||||
|
|
||||||
|
OP_CLONE(BangSoftmaxObj);
|
||||||
|
|
||||||
|
optional<vector<Shape>> inferShape(const TensorVec &inputs) override {
|
||||||
|
return {{inputs[0]->getDims()}};
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string toString() const override;
|
||||||
|
int numInputs() const override { return 1; }
|
||||||
|
int numOutputs() const override { return 1; }
|
||||||
|
|
||||||
|
int getAxis() const { return axis; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
vector<int> getWorkloadVector() const override;
|
||||||
|
vector<int> getOpAttrVector() const override;
|
||||||
|
};
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -227,6 +227,15 @@ Tensor GraphHandlerObj::softmax(Tensor input, Tensor output, int axis) {
|
||||||
->getOutput();
|
->getOutput();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Tensor GraphHandlerObj::bangSoftmax(Tensor input, Tensor output, int axis) {
|
||||||
|
if (output) {
|
||||||
|
g->addOpWithOutputs<BangSoftmaxObj>(std::move(input), output, axis);
|
||||||
|
return output;
|
||||||
|
} else {
|
||||||
|
return g->addOp<BangSoftmaxObj>(std::move(input), output, axis)
|
||||||
|
->getOutput();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Tensor GraphHandlerObj::flatten(Tensor input, Tensor output, int axis) {
|
Tensor GraphHandlerObj::flatten(Tensor input, Tensor output, int axis) {
|
||||||
if (output) {
|
if (output) {
|
||||||
|
|
|
@ -522,6 +522,7 @@ void init_graph_builder(py::module &m) {
|
||||||
.def("hardSigmoid", &Handler::hardSigmoid, policy::move)
|
.def("hardSigmoid", &Handler::hardSigmoid, policy::move)
|
||||||
.def("hardSwish", &Handler::hardSwish, policy::move)
|
.def("hardSwish", &Handler::hardSwish, policy::move)
|
||||||
.def("softmax", &Handler::softmax, policy::move)
|
.def("softmax", &Handler::softmax, policy::move)
|
||||||
|
.def("bangSoftmax", &Handler::bangSoftmax, policy::move)
|
||||||
.def("abs", &Handler::abs, policy::move)
|
.def("abs", &Handler::abs, policy::move)
|
||||||
.def("sqrt", &Handler::sqrt, policy::move)
|
.def("sqrt", &Handler::sqrt, policy::move)
|
||||||
.def("neg", &Handler::neg, policy::move)
|
.def("neg", &Handler::neg, policy::move)
|
||||||
|
|
|
@ -246,7 +246,8 @@ REGISTER_KERNEL(Device::BANG, OpType::PRelu, PReluCnnl, "PRelu_cnnl_BANG");
|
||||||
REGISTER_KERNEL(Device::BANG, OpType::Sigmoid, SigmoidCnnl,
|
REGISTER_KERNEL(Device::BANG, OpType::Sigmoid, SigmoidCnnl,
|
||||||
"Sigmoid_cnnl_BANG");
|
"Sigmoid_cnnl_BANG");
|
||||||
REGISTER_KERNEL(Device::BANG, OpType::Round, RoundCnnl, "Round_cnnl_BANG");
|
REGISTER_KERNEL(Device::BANG, OpType::Round, RoundCnnl, "Round_cnnl_BANG");
|
||||||
|
REGISTER_KERNEL(Device::BANG, OpType::Softmax, SoftmaxCnnl,
|
||||||
|
"Softmax_cnnl_BANG");
|
||||||
REGISTER_KERNEL(Device::BANG, OpType::HardSigmoid, HardSigmoidCnnl,
|
REGISTER_KERNEL(Device::BANG, OpType::HardSigmoid, HardSigmoidCnnl,
|
||||||
"HardSigmoid_cnnl_BANG");
|
"HardSigmoid_cnnl_BANG");
|
||||||
REGISTER_KERNEL(Device::BANG, OpType::HardSwish, HardSwishCnnl,
|
REGISTER_KERNEL(Device::BANG, OpType::HardSwish, HardSwishCnnl,
|
||||||
|
|
|
@ -10,5 +10,5 @@ class SoftmaxBang : public BangKernelWithoutConfig {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL(Device::BANG, OpType::Softmax, SoftmaxBang, "Softmax_BANG");
|
REGISTER_KERNEL(Device::BANG, OpType::BangSoftmax, SoftmaxBang, "Softmax_BANG");
|
||||||
}; // namespace infini
|
}; // namespace infini
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#include "bang_highSoftmax.h"
|
#include "bang_bangSoftmax.h"
|
||||||
#include "highSoftmax.h"
|
#include "bangSoftmax.h"
|
||||||
|
|
||||||
namespace infini{
|
namespace infini{
|
||||||
void softmaxKernel(cnnlHandle_t handle, float *mlu_destination, float *mlu_src, int nDim, int axis, int othersize, int frontsize, int dimsize, int stride){
|
void softmaxKernel(cnnlHandle_t handle, float *mlu_destination, float *mlu_src, int nDim, int axis, int othersize, int frontsize, int dimsize, int stride){
|
|
@ -31,4 +31,33 @@ vector<int> SoftmaxObj::getWorkloadVector() const {
|
||||||
vector<int> SoftmaxObj::getOpAttrVector() const {
|
vector<int> SoftmaxObj::getOpAttrVector() const {
|
||||||
return {type.underlying(), axis};
|
return {type.underlying(), axis};
|
||||||
}
|
}
|
||||||
|
BangSoftmaxObj::BangSoftmaxObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
|
int _axis)
|
||||||
|
: OperatorObj(OpType::BangSoftmax, {input}, {output}) {
|
||||||
|
int rank = input->getRank();
|
||||||
|
axis = get_real_axis(_axis, rank);
|
||||||
|
IT_ASSERT(checkValid(graph));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string BangSoftmaxObj::toString() const {
|
||||||
|
std::ostringstream os;
|
||||||
|
os << type.toString() << "[" << getGuid() << "]";
|
||||||
|
os << "(";
|
||||||
|
os << vecToString(inputs[0]->getDims()) << ",";
|
||||||
|
os << "input=" << inputs[0]->getGuid() << ",";
|
||||||
|
os << "output=" << outputs[0]->getGuid() << ",";
|
||||||
|
os << "axis=" << axis << ")";
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<int> BangSoftmaxObj::getWorkloadVector() const {
|
||||||
|
vector<int> ret{type.underlying(), axis};
|
||||||
|
const Shape shape = outputs[0]->getDims();
|
||||||
|
ret.insert(ret.end(), shape.begin(), shape.end());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<int> BangSoftmaxObj::getOpAttrVector() const {
|
||||||
|
return {type.underlying(), axis};
|
||||||
|
}
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -5,134 +5,121 @@
|
||||||
#include "operators/softmax.h"
|
#include "operators/softmax.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <sys/time.h>
|
||||||
namespace infini {
|
namespace infini {
|
||||||
double eps = 3e-3;
|
double eps = 3e-3;
|
||||||
TEST(cuDNN_Softmax, run_axis1) {
|
void test_softmaxFp32(const Shape &inputShape, const vector<float> &inputData,
|
||||||
// Runtime
|
int axis, const vector<float> &expectData) {
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
auto bangRuntime = make_ref<BangRuntimeObj>();
|
||||||
|
|
||||||
// Build input data on CPU
|
// Build input data on CPU
|
||||||
Tensor inputCpu =
|
Tensor inputCpu =
|
||||||
make_ref<TensorObj>(Shape{2, 4}, DataType::Float32, cpuRuntime);
|
make_ref<TensorObj>(inputShape, DataType::Float32, cpuRuntime);
|
||||||
|
|
||||||
// GPU
|
// GPU
|
||||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
||||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
||||||
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, 1);
|
// cnnlSoftmax
|
||||||
|
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, axis);
|
||||||
bangGraph->dataMalloc();
|
bangGraph->dataMalloc();
|
||||||
inputGpu->copyin(vector<float>{0, 1, 2, 3, 10000, 10001, 10002, 10003});
|
inputGpu->copyin(inputData);
|
||||||
bangRuntime->run(bangGraph);
|
bangRuntime->run(bangGraph);
|
||||||
auto outputGpu = gpuOp->getOutput();
|
auto outputGpu = gpuOp->getOutput();
|
||||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
||||||
|
// bangSoftmax
|
||||||
|
auto bangGpuOp = bangGraph->addOp<BangSoftmaxObj>(inputGpu, nullptr, axis);
|
||||||
|
bangGraph->dataMalloc();
|
||||||
|
inputGpu->copyin(inputData);
|
||||||
|
bangRuntime->run(bangGraph);
|
||||||
|
auto bangOutputGpu = gpuOp->getOutput();
|
||||||
|
auto bangOutputGpu2Cpu = bangOutputGpu->clone(cpuRuntime);
|
||||||
// Check
|
// Check
|
||||||
EXPECT_TRUE(outputGpu2Cpu->equalData(
|
EXPECT_TRUE(outputGpu2Cpu->equalData(expectData, eps)); // cnnlSoftmax
|
||||||
|
EXPECT_TRUE(bangOutputGpu2Cpu->equalData(expectData, eps)); // bangSoftmax
|
||||||
|
}
|
||||||
|
double get_walltime() {
|
||||||
|
struct timeval tp;
|
||||||
|
gettimeofday(&tp, NULL);
|
||||||
|
return (double)(tp.tv_sec + tp.tv_usec * 1e-6);
|
||||||
|
}
|
||||||
|
float err(float *x, float *y, const Shape &inputShape, int nDim) {
|
||||||
|
int size = 1;
|
||||||
|
for (int i = 0; i < nDim; i++) {
|
||||||
|
size *= inputShape[i];
|
||||||
|
}
|
||||||
|
float error = 0;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (fabs(x[i] - y[i]) > error) {
|
||||||
|
error = fabs(x[i] - y[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
void test_compareSoftmaxFp32(
|
||||||
|
int axis, const Shape &inputShape, int nDim,
|
||||||
|
const std::function<void(void *, size_t, DataType)> &generator) {
|
||||||
|
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||||
|
auto bangRuntime = make_ref<BangRuntimeObj>();
|
||||||
|
|
||||||
|
// Build input data on CPU
|
||||||
|
Tensor inputCpu =
|
||||||
|
make_ref<TensorObj>(inputShape, DataType::Float32, cpuRuntime);
|
||||||
|
|
||||||
|
// GPU
|
||||||
|
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
||||||
|
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
||||||
|
// cnnlSoftmax
|
||||||
|
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, axis);
|
||||||
|
bangGraph->dataMalloc();
|
||||||
|
inputGpu->setData(generator);
|
||||||
|
double bangst, bangela;
|
||||||
|
bangst = get_walltime();
|
||||||
|
bangRuntime->run(bangGraph);
|
||||||
|
bangela = 1000 * (get_walltime() - bangst);
|
||||||
|
auto outputGpu = gpuOp->getOutput();
|
||||||
|
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
||||||
|
// bangSoftmax
|
||||||
|
auto bangGpuOp = bangGraph->addOp<BangSoftmaxObj>(inputGpu, nullptr, axis);
|
||||||
|
bangGraph->dataMalloc();
|
||||||
|
inputGpu->setData(generator);
|
||||||
|
double cnnlst, cnnlela;
|
||||||
|
cnnlst = get_walltime();
|
||||||
|
bangRuntime->run(bangGraph);
|
||||||
|
cnnlela = 1000 * (get_walltime() - cnnlst);
|
||||||
|
auto bangOutputGpu = gpuOp->getOutput();
|
||||||
|
auto bangOutputGpu2Cpu = bangOutputGpu->clone(cpuRuntime);
|
||||||
|
// Check
|
||||||
|
float error =
|
||||||
|
err(outputGpu2Cpu->getRawDataPtr<float *>(),
|
||||||
|
bangOutputGpu2Cpu->getRawDataPtr<float *>(), inputShape, nDim);
|
||||||
|
printf("axis:%d. bang time:%.2f ms, cnnl time:%.2f ms, err:%.4e\n", axis,
|
||||||
|
bangela, cnnlela, error);
|
||||||
|
}
|
||||||
|
TEST(BANG_SoftmaxFp32, run) {
|
||||||
|
test_softmaxFp32(
|
||||||
|
Shape{2, 3, 2, 2},
|
||||||
|
vector<float>{0., 1., 2., 3., 4., 5., 6., 7.,
|
||||||
|
8., 9., 10., 11., 12., 13., 14., 15.,
|
||||||
|
16., 17., 18., 19., 20., 21., 22., 23.},
|
||||||
|
0, vector<float>{6.14417422e-06, 6.14417422e-06, 6.14417422e-06,
|
||||||
|
6.14417422e-06, 6.14417422e-06, 6.14417422e-06,
|
||||||
|
6.14417422e-06, 6.14417422e-06, 6.14417422e-06,
|
||||||
|
6.14417422e-06, 6.14417422e-06, 6.14417422e-06,
|
||||||
|
9.99993801e-01, 9.99993801e-01, 9.99993801e-01,
|
||||||
|
9.99993801e-01, 9.99993801e-01, 9.99993801e-01,
|
||||||
|
9.99993801e-01, 9.99993801e-01, 9.99993801e-01,
|
||||||
|
9.99993801e-01, 9.99993801e-01, 9.99993801e-01});
|
||||||
|
test_softmaxFp32(
|
||||||
|
Shape{2, 4}, vector<float>{0., 1., 2., 3., 1000, 1001, 1002, 1003}, 1,
|
||||||
vector<float>{0.032058604, 0.08714432, 0.23688284, 0.6439143,
|
vector<float>{0.032058604, 0.08714432, 0.23688284, 0.6439143,
|
||||||
0.032058604, 0.08714432, 0.23688284, 0.6439143},
|
0.032058604, 0.08714432, 0.23688284, 0.6439143});
|
||||||
eps));
|
}
|
||||||
|
TEST(BANG_CompareSoftmaxFp32, run) {
|
||||||
|
test_compareSoftmaxFp32(3, Shape{1, 32, 1, 5}, 4, RandomGenerator());
|
||||||
|
test_compareSoftmaxFp32(3, Shape{1, 32, 128, 5}, 4, RandomGenerator());
|
||||||
|
test_compareSoftmaxFp32(3, Shape{1, 32, 1, 5}, 4, IncrementalGenerator());
|
||||||
|
test_compareSoftmaxFp32(3, Shape{1, 32, 128, 5}, 4, IncrementalGenerator());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(cuDNN_Softmax, run_axis0) {
|
|
||||||
// Runtime
|
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
||||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
|
||||||
|
|
||||||
// Build input data on CPU
|
|
||||||
Tensor inputCpu =
|
|
||||||
make_ref<TensorObj>(Shape{2, 4}, DataType::Float32, cpuRuntime);
|
|
||||||
|
|
||||||
// GPU
|
|
||||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
|
||||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
|
||||||
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, 0);
|
|
||||||
bangGraph->dataMalloc();
|
|
||||||
inputGpu->copyin(vector<float>{0, 1, 2, 3, 10000, 10001, 10002, 10003});
|
|
||||||
bangRuntime->run(bangGraph);
|
|
||||||
auto outputGpu = gpuOp->getOutput();
|
|
||||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
|
||||||
// Check
|
|
||||||
EXPECT_TRUE(outputGpu2Cpu->equalData(
|
|
||||||
vector<float>{0., 0., 0., 0., 1, 1, 1, 1}, eps));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(cuDNN_Softmax2, run_axis1) {
|
|
||||||
// Runtime
|
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
||||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
|
||||||
|
|
||||||
// Build input data on CPU
|
|
||||||
Tensor inputCpu =
|
|
||||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
|
||||||
|
|
||||||
// GPU
|
|
||||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
|
||||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
|
||||||
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, 1);
|
|
||||||
bangGraph->dataMalloc();
|
|
||||||
inputGpu->setData(IncrementalGenerator());
|
|
||||||
bangRuntime->run(bangGraph);
|
|
||||||
auto outputGpu = gpuOp->getOutput();
|
|
||||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
|
||||||
// Check
|
|
||||||
EXPECT_TRUE(outputGpu2Cpu->equalData(
|
|
||||||
vector<float>{0.0179862, 0.0179862, 0.0179862, 0.0179862, 0.9820138,
|
|
||||||
0.9820138, 0.9820138, 0.9820138, 0.0179862, 0.0179862,
|
|
||||||
0.0179862, 0.0179862, 0.9820138, 0.9820138, 0.9820138,
|
|
||||||
0.9820138},
|
|
||||||
eps));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(cuDNN_Softmax2, run_axis2) {
|
|
||||||
// Runtime
|
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
||||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
|
||||||
|
|
||||||
// Build input data on CPU
|
|
||||||
Tensor inputCpu =
|
|
||||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
|
||||||
|
|
||||||
// GPU
|
|
||||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
|
||||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
|
||||||
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, 2);
|
|
||||||
bangGraph->dataMalloc();
|
|
||||||
inputGpu->setData(IncrementalGenerator());
|
|
||||||
bangRuntime->run(bangGraph);
|
|
||||||
auto outputGpu = gpuOp->getOutput();
|
|
||||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
|
||||||
// Check
|
|
||||||
EXPECT_TRUE(outputGpu2Cpu->equalData(
|
|
||||||
vector<float>{0.1192029, 0.1192029, 0.8807971, 0.8807971, 0.1192029,
|
|
||||||
0.1192029, 0.8807971, 0.8807971, 0.1192029, 0.1192029,
|
|
||||||
0.8807971, 0.8807971, 0.1192029, 0.1192029, 0.8807971,
|
|
||||||
0.8807971},
|
|
||||||
eps));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(cuDNN_Softmax2, run_axis3) {
|
|
||||||
// Runtime
|
|
||||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
|
||||||
auto bangRuntime = make_ref<BangRuntimeObj>();
|
|
||||||
|
|
||||||
// Build input data on CPU
|
|
||||||
Tensor inputCpu =
|
|
||||||
make_ref<TensorObj>(Shape{2, 2, 2, 2}, DataType::Float32, cpuRuntime);
|
|
||||||
|
|
||||||
// GPU
|
|
||||||
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
|
|
||||||
auto inputGpu = bangGraph->cloneTensor(inputCpu);
|
|
||||||
auto gpuOp = bangGraph->addOp<SoftmaxObj>(inputGpu, nullptr, 3);
|
|
||||||
bangGraph->dataMalloc();
|
|
||||||
inputGpu->setData(IncrementalGenerator());
|
|
||||||
bangRuntime->run(bangGraph);
|
|
||||||
auto outputGpu = gpuOp->getOutput();
|
|
||||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
|
||||||
// Check
|
|
||||||
EXPECT_TRUE(outputGpu2Cpu->equalData(
|
|
||||||
vector<float>{0.2689414, 0.7310586, 0.2689414, 0.7310586, 0.2689414,
|
|
||||||
0.7310586, 0.2689414, 0.7310586, 0.2689414, 0.7310586,
|
|
||||||
0.2689414, 0.7310586, 0.2689414, 0.7310586, 0.2689414,
|
|
||||||
0.7310586},
|
|
||||||
eps));
|
|
||||||
}
|
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
Loading…
Reference in New Issue