add divnonan operation and test

This commit is contained in:
wanghailu 2022-12-15 08:47:22 +00:00
parent a56fb98eee
commit 9346232129
5 changed files with 105 additions and 1 deletions

View File

@ -26,6 +26,7 @@ enum class OpType {
Mul,
Div,
DivDemo,
DivNoNan,
Pow,
Gather,
ReduceMean,
@ -97,6 +98,7 @@ class OpRegistry {
FOP(Mul);
FOP(Div);
FOP(DivDemo);
FOP(DivNoNan);
FOP(Pow);
FOP(Gather);
FOP(ReduceMean);

View File

@ -29,6 +29,7 @@ DEFINE_ELEMENT_WISE_OBJ(Add, OpType::Add)
DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub)
DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul)
DEFINE_ELEMENT_WISE_OBJ(DivDemo, OpType::DivDemo)
DEFINE_ELEMENT_WISE_OBJ(DivNoNan, OpType::DivNoNan)
DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div)
DEFINE_ELEMENT_WISE_OBJ(Pow, OpType::Pow)
}; // namespace infini

View File

@ -116,6 +116,56 @@ class DivCnnl : public BangKernelWithoutConfig {
}
};
class DivNoNanCnnl : public BangKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *_context) const override {
auto op = as<ElementWiseObj>(_op);
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
void *const bData = (op->getInputs(1)->getRawDataPtr<void *>());
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
auto dim = op->getInputs(0)->getDims();
if (dim.size() != 4)
IT_TODO_HALT();
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
// get inputs
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
CNNL_DTYPE_FLOAT, 4, dim_array));
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
CNNL_DTYPE_FLOAT, 4, dim_array));
// get outputs
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
CNNL_DTYPE_FLOAT, 4, dim_array));
size_t wsSize;
cnnlGetDivNoNanWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
&wsSize);
BangPtr wsData = context->getWorkspace(wsSize);
cnnlStatus_t stat = cnnlDivNoNan_v2(context->cnnlHandle(),
CNNL_COMPUTATION_HIGH_PRECISION,
aDesc, aData, bDesc, bData, wsData, wsSize, cDesc, cData);
if (stat != CNNL_STATUS_SUCCESS)
return;
// Destories in BANG does not require sync. But cnnl does not state
// whether sync is required before destories.
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
}
};
class AddCnnl : public ElementWiseCnnl {
cnnlOpTensorDesc_t getOpType() const override { return CNNL_OP_TENSOR_ADD; }
};
@ -149,6 +199,8 @@ REGISTER_KERNEL(Device::BANG, OpType::DivDemo, DataType::Float32, ElementWiseBan
"DivDemo_Bang_Float32");
REGISTER_KERNEL(Device::BANG, OpType::Div, DataType::Float32, DivCnnl,
"Div_cnnl_Float32");
REGISTER_KERNEL(Device::BANG, OpType::DivNoNan, DataType::Float32, DivNoNanCnnl,
"DivNoNan_cnnl_Float32");
// REGISTER_KERNEL(Device::BANG, OpType::Pow, DataType::Float32,
// ElementWiseBang,
// "Pow_Bang_Float32");

View File

@ -42,7 +42,7 @@ void testDivDemo(
EXPECT_TRUE(1);
}
TEST(cuDNN_DivDemo, run) {
TEST(cnnl_DivDemo, run) {
testDivDemo<DivDemoObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
}

View File

@ -0,0 +1,49 @@
#include "bang/bang_runtime.h"
#include "core/graph.h"
#include "core/kernel.h"
#include "core/runtime.h"
#include "operators/element_wise.h"
#include "test.h"
namespace infini {
template <class T>
void testDivNoNan(
const std::function<void(void *, size_t, DataType)> &generator,
const Shape &shape) {
// Runtime
Runtime cpuRuntime = CpuRuntimeObj::getInstance();
auto bangRuntime = make_ref<BangRuntimeObj>();
// Build input data on CPU
Tensor inputCpu1 =
make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu1->dataMalloc();
inputCpu1->setData(generator);
Tensor inputCpu2 =
make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu2->dataMalloc();
inputCpu2->setData(generator);
// GPU
Graph bangGraph = make_ref<GraphObj>(bangRuntime);
auto inputGpu1 = bangGraph->cloneTensor(inputCpu1);
auto inputGpu2 = bangGraph->cloneTensor(inputCpu2);
auto gpuOp = bangGraph->addOp<T>(inputGpu1, inputGpu2, nullptr);
bangGraph->dataMalloc();
bangRuntime->run(bangGraph);
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
// Check
inputCpu1->printData();
inputCpu2->printData();
outputGpu2Cpu->printData();
EXPECT_TRUE(1);
}
TEST(cnnl_DivNoNan, run) {
testDivNoNan<DivNoNanObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
}
} // namespace infini