forked from jiuyuan/InfiniTensor
Fix mlu (#87)
* fix some operator code * fix some code of mlu operator * fix some code of cast and elementwise * clang format * remove copy kernel * fix cast * fix clang-format --------- Co-authored-by: wanghailu <wanghailu@qiyuanlab.com> Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
2412c25e67
commit
b4eda85e67
|
@ -16,25 +16,17 @@ class UnaryCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
int len = dim.size();
|
auto cDim = op->getOutput()->getDims();
|
||||||
int size = 1;
|
|
||||||
for (int i = 0; i < len; ++i) {
|
|
||||||
size *= dim[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
int dim_array[1] = {size};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 1, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_ARRAY,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 1, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
// get op descriptor
|
|
||||||
cnnlActivationDescriptor_t opDesc;
|
cnnlActivationDescriptor_t opDesc;
|
||||||
checkCnnlError(cnnlCreateActivationDescriptor(&opDesc));
|
checkCnnlError(cnnlCreateActivationDescriptor(&opDesc));
|
||||||
checkCnnlError(cnnlSetActivationDescriptor(
|
checkCnnlError(cnnlSetActivationDescriptor(
|
||||||
|
@ -46,9 +38,6 @@ class UnaryCnnl : public BangKernelWithoutConfig {
|
||||||
aData, &beta, cDesc, cData);
|
aData, &beta, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
checkCnnlError(cnnlDestroyActivationDescriptor(opDesc));
|
checkCnnlError(cnnlDestroyActivationDescriptor(opDesc));
|
||||||
|
@ -65,28 +54,21 @@ class RoundCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlRound(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
cnnlRound(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
@ -103,31 +85,28 @@ class PReluCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto bDim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto cDim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
int alpha_array[4] = {1, 1, 1, 1};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
bDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, alpha_array));
|
CNNL_DTYPE_FLOAT, bDim.size(),
|
||||||
// get outputs
|
bDim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat = cnnlPrelu(context->cnnlHandle(), aDesc, aData,
|
cnnlStatus_t stat = cnnlPrelu(context->cnnlHandle(), aDesc, aData,
|
||||||
bDesc, bData, cDesc, cData);
|
bDesc, bData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
|
|
@ -18,28 +18,27 @@ class ActivationBackwardCnnl : public BangKernelWithoutConfig {
|
||||||
void *const diffXData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const diffXData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t yDesc, diffYDesc, xDesc, diffXDesc;
|
cnnlTensorDescriptor_t yDesc, diffYDesc, xDesc, diffXDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto yDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto diffyDim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto xDim = op->getInputs(2)->getDims();
|
||||||
|
auto diffxDim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&yDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&yDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(yDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(yDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, yDim.size(),
|
||||||
// get inputs
|
yDim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&diffYDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&diffYDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(diffYDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
diffYDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, diffyDim.size(),
|
||||||
// get inputs
|
diffyDim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&xDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&xDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(xDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(xDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, xDim.size(),
|
||||||
// get outputs
|
xDim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&diffXDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&diffXDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(diffXDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
diffXDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, diffxDim.size(),
|
||||||
|
diffxDim.data()));
|
||||||
// get op descriptor
|
// get op descriptor
|
||||||
cnnlActivationDescriptor_t opDesc;
|
cnnlActivationDescriptor_t opDesc;
|
||||||
checkCnnlError(cnnlCreateActivationDescriptor(&opDesc));
|
checkCnnlError(cnnlCreateActivationDescriptor(&opDesc));
|
||||||
|
@ -53,8 +52,6 @@ class ActivationBackwardCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(yDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(yDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(diffYDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(diffYDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(xDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(xDesc));
|
||||||
|
|
|
@ -14,154 +14,189 @@ class CastCnnl : public BangKernelWithoutConfig {
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
cnnlCastDataType_t NlCastType;
|
cnnlCastDataType_t NlCastType;
|
||||||
CastType type = op->getType();
|
CastType type = op->getType();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case CastType::Float2Int64:
|
case CastType::Float2Int64:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_FLOAT_TO_INT64;
|
NlCastType = CNNL_CAST_FLOAT_TO_INT64;
|
||||||
break;
|
break;
|
||||||
case CastType::Float2Int32:
|
case CastType::Float2Int32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_FLOAT_TO_INT32;
|
NlCastType = CNNL_CAST_FLOAT_TO_INT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Float2Int16:
|
case CastType::Float2Int16:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT16, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT16, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_FLOAT_TO_INT16;
|
NlCastType = CNNL_CAST_FLOAT_TO_INT16;
|
||||||
break;
|
break;
|
||||||
case CastType::Float2Int8:
|
case CastType::Float2Int8:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT8, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT8, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_FLOAT_TO_INT8;
|
NlCastType = CNNL_CAST_FLOAT_TO_INT8;
|
||||||
break;
|
break;
|
||||||
case CastType::Int322Float:
|
case CastType::Int322Float:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT32_TO_FLOAT;
|
NlCastType = CNNL_CAST_INT32_TO_FLOAT;
|
||||||
break;
|
break;
|
||||||
case CastType::Int322Int8:
|
case CastType::Int322Int8:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT8, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT8, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT32_TO_INT8;
|
NlCastType = CNNL_CAST_INT32_TO_INT8;
|
||||||
break;
|
break;
|
||||||
case CastType::Int322Int16:
|
case CastType::Int322Int16:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT16, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT16, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT32_TO_INT16;
|
NlCastType = CNNL_CAST_INT32_TO_INT16;
|
||||||
break;
|
break;
|
||||||
case CastType::Int162Float:
|
case CastType::Int162Float:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT16, 4, dim_array));
|
CNNL_DTYPE_INT16, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT16_TO_FLOAT;
|
NlCastType = CNNL_CAST_INT16_TO_FLOAT;
|
||||||
break;
|
break;
|
||||||
case CastType::Int162Int32:
|
case CastType::Int162Int32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT16, 4, dim_array));
|
CNNL_DTYPE_INT16, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT16_TO_INT32;
|
NlCastType = CNNL_CAST_INT16_TO_INT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Int82Float:
|
case CastType::Int82Float:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT8, 4, dim_array));
|
CNNL_DTYPE_INT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT8_TO_FLOAT;
|
NlCastType = CNNL_CAST_INT8_TO_FLOAT;
|
||||||
break;
|
break;
|
||||||
case CastType::Int82Int16:
|
case CastType::Int82Int16:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT8, 4, dim_array));
|
CNNL_DTYPE_INT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT16, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT16, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT8_TO_INT16;
|
NlCastType = CNNL_CAST_INT8_TO_INT16;
|
||||||
break;
|
break;
|
||||||
case CastType::Int82Int32:
|
case CastType::Int82Int32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT8, 4, dim_array));
|
CNNL_DTYPE_INT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT8_TO_INT32;
|
NlCastType = CNNL_CAST_INT8_TO_INT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Uint82Float:
|
case CastType::Uint82Float:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_UINT8, 4, dim_array));
|
CNNL_DTYPE_UINT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_UINT8_TO_FLOAT;
|
NlCastType = CNNL_CAST_UINT8_TO_FLOAT;
|
||||||
break;
|
break;
|
||||||
case CastType::Uint82Int32:
|
case CastType::Uint82Int32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_UINT8, 4, dim_array));
|
CNNL_DTYPE_UINT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_UINT8_TO_INT32;
|
NlCastType = CNNL_CAST_UINT8_TO_INT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Uint82Int64:
|
case CastType::Uint82Int64:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_UINT8, 4, dim_array));
|
CNNL_DTYPE_UINT8, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_UINT8_TO_INT64;
|
NlCastType = CNNL_CAST_UINT8_TO_INT64;
|
||||||
break;
|
break;
|
||||||
case CastType::Int322Int64:
|
case CastType::Int322Int64:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT32_TO_INT64;
|
NlCastType = CNNL_CAST_INT32_TO_INT64;
|
||||||
break;
|
break;
|
||||||
case CastType::Int642Int32:
|
case CastType::Int642Int32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT32, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT64_TO_INT32;
|
NlCastType = CNNL_CAST_INT64_TO_INT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Int642Uint32:
|
case CastType::Int642Uint32:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_UINT32, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_UINT32,
|
||||||
|
dim.size(), dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT64_TO_UINT32;
|
NlCastType = CNNL_CAST_INT64_TO_UINT32;
|
||||||
break;
|
break;
|
||||||
case CastType::Int642Float:
|
case CastType::Int642Float:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_INT64_TO_FLOAT;
|
NlCastType = CNNL_CAST_INT64_TO_FLOAT;
|
||||||
break;
|
break;
|
||||||
case CastType::Uint322Int64:
|
case CastType::Uint322Int64:
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_UINT32, 4, dim_array));
|
CNNL_DTYPE_UINT32,
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
dim.size(), dim.data()));
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_INT64, 4, dim_array));
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_INT64, dim.size(),
|
||||||
|
dim.data()));
|
||||||
NlCastType = CNNL_CAST_UINT32_TO_INT64;
|
NlCastType = CNNL_CAST_UINT32_TO_INT64;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -172,8 +207,6 @@ class CastCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,28 +13,23 @@ class CeilCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlCeil(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
cnnlCeil(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,23 +15,17 @@ class ClipCnnl : public BangKernelWithoutConfig {
|
||||||
float max = op->getMax().value();
|
float max = op->getMax().value();
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc;
|
cnnlTensorDescriptor_t aDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlClip(context->cnnlHandle(), aDesc, aData, &min, &max, cData);
|
cnnlClip(context->cnnlHandle(), aDesc, aData, &min, &max, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -10,40 +10,29 @@ class ConcatCnnl : public BangKernelWithoutConfig {
|
||||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||||
int num = op->numInputs();
|
int num = op->numInputs();
|
||||||
int axis = op->getDim();
|
int axis = op->getDim();
|
||||||
|
|
||||||
|
auto cDim = op->getOutput()->getDims();
|
||||||
|
cnnlTensorDescriptor_t desc;
|
||||||
|
checkCnnlError(cnnlCreateTensorDescriptor(&desc));
|
||||||
|
checkCnnlError(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_NCHW,
|
||||||
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
|
cnnlTensorDescriptor_t descArray[num];
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
checkCnnlError(cnnlCreateTensorDescriptor(&descArray[i]));
|
||||||
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
|
descArray[i], CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT,
|
||||||
|
op->getInputs(i)->getDims().size(),
|
||||||
|
op->getInputs(i)->getDims().data()));
|
||||||
|
}
|
||||||
|
|
||||||
void *argv[num];
|
void *argv[num];
|
||||||
for (int i = 0; i < num; ++i) {
|
for (int i = 0; i < num; ++i) {
|
||||||
argv[i] = op->getInputs(i)->getRawDataPtr<void *>();
|
argv[i] = op->getInputs(i)->getRawDataPtr<void *>();
|
||||||
}
|
}
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t desc;
|
|
||||||
|
|
||||||
int dim_array[num][4];
|
|
||||||
for (int i = 0; i < num; ++i) {
|
|
||||||
auto dim = op->getInputs(i)->getDims();
|
|
||||||
if (dim.size() != 4) {
|
|
||||||
IT_TODO_HALT();
|
|
||||||
}
|
|
||||||
dim_array[i][0] = dim[0];
|
|
||||||
dim_array[i][1] = dim[1];
|
|
||||||
dim_array[i][2] = dim[2];
|
|
||||||
dim_array[i][3] = dim[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
auto dim = op->getOutput()->getDims();
|
|
||||||
int dimout_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&desc));
|
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
|
||||||
desc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dimout_array));
|
|
||||||
cnnlTensorDescriptor_t descArray[num];
|
|
||||||
for (int i = 0; i < num; ++i) {
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&descArray[i]));
|
|
||||||
checkCnnlError(
|
|
||||||
cnnlSetTensorDescriptor(descArray[i], CNNL_LAYOUT_NCHW,
|
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array[i]));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetConcatWorkspaceSize(context->cnnlHandle(), num, &wsSize);
|
cnnlGetConcatWorkspaceSize(context->cnnlHandle(), num, &wsSize);
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
@ -54,8 +43,6 @@ class ConcatCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
for (int i = 0; i < num; ++i) {
|
for (int i = 0; i < num; ++i) {
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(descArray[i]));
|
checkCnnlError(cnnlDestroyTensorDescriptor(descArray[i]));
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,28 +21,22 @@ class DetCnnl : public BangKernelWithoutConfig {
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dimin = op->getInputs(0)->getDims();
|
auto dimin = op->getInputs(0)->getDims();
|
||||||
auto dimout = op->getOutput()->getDims();
|
auto dimout = op->getOutput()->getDims();
|
||||||
if (dimin.size() != 4 || dimout.size() != 2)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dimin_array[4] = {dimin[0], dimin[1], dimin[2], dimin[3]};
|
|
||||||
int dimout_array[2] = {dimout[0], dimout[1]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY,
|
||||||
aDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT, 4, dimin_array));
|
CNNL_DTYPE_FLOAT, dimin.size(),
|
||||||
|
dimin.data()));
|
||||||
|
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_ARRAY,
|
||||||
cDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT, 2, dimout_array));
|
CNNL_DTYPE_FLOAT, dimout.size(),
|
||||||
|
dimout.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlDet(context->cnnlHandle(), nlMode, aDesc, aData, cDesc, cData);
|
cnnlDet(context->cnnlHandle(), nlMode, aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,24 +22,21 @@ class ElementWiseCnnl : public BangKernelWithoutConfig {
|
||||||
auto b_dim = op->getInputs(1)->getDims();
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
auto c_dim = op->getOutput()->getDims();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
if (a_dim.size() > 4 || b_dim.size() > 4 || c_dim.size() > 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, a_dim.data()));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
bDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, b_dim.data()));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
|
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, c_dim.data()));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
cnnlOpTensorDescriptor_t opDesc;
|
cnnlOpTensorDescriptor_t opDesc;
|
||||||
checkCnnlError(cnnlCreateOpTensorDescriptor(&opDesc));
|
checkCnnlError(cnnlCreateOpTensorDescriptor(&opDesc));
|
||||||
checkCnnlError(cnnlSetOpTensorDescriptor(
|
checkCnnlError(cnnlSetOpTensorDescriptor(
|
||||||
|
@ -58,8 +55,6 @@ class ElementWiseCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -79,29 +74,26 @@ class LogicOpCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetLogicOpWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
cnnlGetLogicOpWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
||||||
&wsSize);
|
&wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
|
@ -110,8 +102,6 @@ class LogicOpCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -130,29 +120,26 @@ class BitComputeCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_INT32, 4, dim_array));
|
CNNL_DTYPE_INT32, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetBitComputeWorkspaceSize(context->cnnlHandle(), aDesc, bDesc,
|
cnnlGetBitComputeWorkspaceSize(context->cnnlHandle(), aDesc, bDesc,
|
||||||
cDesc, &wsSize);
|
cDesc, &wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
|
@ -161,8 +148,6 @@ class BitComputeCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -180,29 +165,26 @@ class DivCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetDivWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
cnnlGetDivWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
||||||
&wsSize);
|
&wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat = cnnlDiv_v2(
|
cnnlStatus_t stat = cnnlDiv_v2(
|
||||||
|
@ -211,8 +193,6 @@ class DivCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -230,26 +210,23 @@ class MaximumCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetMaximumWorkspaceSize(context->cnnlHandle(), cDesc, &wsSize);
|
cnnlGetMaximumWorkspaceSize(context->cnnlHandle(), cDesc, &wsSize);
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
@ -260,8 +237,6 @@ class MaximumCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -279,26 +254,23 @@ class MinimumCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetMinimumWorkspaceSize(context->cnnlHandle(), cDesc, &wsSize);
|
cnnlGetMinimumWorkspaceSize(context->cnnlHandle(), cDesc, &wsSize);
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
@ -309,8 +281,6 @@ class MinimumCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -328,30 +298,23 @@ class MSELossCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
MSELossObj::Reduction reduction = op->getReduction();
|
MSELossObj::Reduction reduction = op->getReduction();
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
int dim_out[4] = {1, 1, 1, 1};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
|
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
if (reduction == MSELossObj::None) {
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_array));
|
c_dim.data()));
|
||||||
} else {
|
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
|
||||||
cDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dim_out));
|
|
||||||
}
|
|
||||||
cnnlStatus_t stat;
|
cnnlStatus_t stat;
|
||||||
if (reduction == MSELossObj::None) {
|
if (reduction == MSELossObj::None) {
|
||||||
stat = cnnlMSELoss(context->cnnlHandle(), CNNL_MSE_LOSS_NONE, aDesc,
|
stat = cnnlMSELoss(context->cnnlHandle(), CNNL_MSE_LOSS_NONE, aDesc,
|
||||||
|
@ -367,8 +330,6 @@ class MSELossCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -386,26 +347,23 @@ class PowerCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetPowWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
cnnlGetPowWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
||||||
&wsSize);
|
&wsSize);
|
||||||
|
@ -417,8 +375,6 @@ class PowerCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -436,29 +392,26 @@ class FloorDivCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetFloorDivWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
cnnlGetFloorDivWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
||||||
&wsSize);
|
&wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat = cnnlFloorDiv_v2(
|
cnnlStatus_t stat = cnnlFloorDiv_v2(
|
||||||
|
@ -467,8 +420,6 @@ class FloorDivCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -486,29 +437,26 @@ class FloorModCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetFloorModWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
cnnlGetFloorModWorkspaceSize(context->cnnlHandle(), aDesc, bDesc, cDesc,
|
||||||
&wsSize);
|
&wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
|
@ -517,8 +465,6 @@ class FloorModCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
@ -536,29 +482,26 @@ class SquaredDifferenceCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto a_dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto b_dim = op->getInputs(1)->getDims();
|
||||||
IT_TODO_HALT();
|
auto c_dim = op->getOutput()->getDims();
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, a_dim.size(),
|
||||||
|
a_dim.data()));
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&bDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, b_dim.size(),
|
||||||
|
b_dim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, c_dim.size(),
|
||||||
|
c_dim.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetSquaredDifferenceWorkspaceSize(context->cnnlHandle(), aDesc,
|
cnnlGetSquaredDifferenceWorkspaceSize(context->cnnlHandle(), aDesc,
|
||||||
bDesc, cDesc, &wsSize);
|
bDesc, cDesc, &wsSize);
|
||||||
|
|
||||||
BangPtr wsData = context->getWorkspace(wsSize);
|
BangPtr wsData = context->getWorkspace(wsSize);
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
|
@ -567,8 +510,6 @@ class SquaredDifferenceCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
|
|
@ -13,20 +13,17 @@ class ErfCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlErf_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
cnnlErf_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
||||||
|
@ -34,8 +31,6 @@ class ErfCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,20 +13,17 @@ class ExpCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlExp_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
cnnlExp_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
||||||
|
@ -34,8 +31,6 @@ class ExpCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,23 +13,18 @@ class FillCnnl : public BangKernelWithoutConfig {
|
||||||
float value = op->getValue();
|
float value = op->getValue();
|
||||||
|
|
||||||
cnnlTensorDescriptor_t cDesc;
|
cnnlTensorDescriptor_t cDesc;
|
||||||
auto dim = op->getOutput()->getDims();
|
auto cDim = op->getOutput()->getDims();
|
||||||
if (dim.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlFill(context->cnnlHandle(), value, cDesc, cData);
|
cnnlFill(context->cnnlHandle(), value, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -13,28 +13,23 @@ class FloorCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlFloor(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
cnnlFloor(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,22 +16,16 @@ class HardtanhCnnl : public BangKernelWithoutConfig {
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc;
|
cnnlTensorDescriptor_t aDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, dim.size(), dim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat = cnnlHardtanh(context->cnnlHandle(), aDesc, aData,
|
cnnlStatus_t stat = cnnlHardtanh(context->cnnlHandle(), aDesc, aData,
|
||||||
max, min, aDesc, cData);
|
max, min, aDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -14,22 +14,16 @@ class L2LossCnnl : public BangKernelWithoutConfig {
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc;
|
cnnlTensorDescriptor_t aDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
aDesc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, dim.size(), dim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlL2Loss(context->cnnlHandle(), aDesc, aData, cData);
|
cnnlL2Loss(context->cnnlHandle(), aDesc, aData, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -28,20 +28,17 @@ class LogCnnl : public BangKernelWithoutConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlLog_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
cnnlLog_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
||||||
|
@ -49,8 +46,6 @@ class LogCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,6 @@ class MatmulCnnl : public BangKernelWithoutConfig {
|
||||||
int32_t transA = op->getTransA();
|
int32_t transA = op->getTransA();
|
||||||
int32_t transB = op->getTransB();
|
int32_t transB = op->getTransB();
|
||||||
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(
|
checkCnnlError(
|
||||||
cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
||||||
|
@ -33,7 +32,6 @@ class MatmulCnnl : public BangKernelWithoutConfig {
|
||||||
cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
cnnlSetTensorDescriptor(bDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
||||||
dimInputs1.size(), dimInputs1.data()));
|
dimInputs1.size(), dimInputs1.data()));
|
||||||
|
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(
|
checkCnnlError(
|
||||||
cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT,
|
||||||
|
@ -68,8 +66,6 @@ class MatmulCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(bDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
|
|
|
@ -13,28 +13,23 @@ class NegTensorCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlNegTensor(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
cnnlNegTensor(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,28 +13,23 @@ class ReciprocalCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlReciprocal(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
cnnlReciprocal(context->cnnlHandle(), aDesc, aData, cDesc, cData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,17 +11,11 @@ class CopyBang : public BangKernelWithoutConfig {
|
||||||
auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
|
auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
|
||||||
cnnlTensorDescriptor_t aDesc;
|
cnnlTensorDescriptor_t aDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto dim = op->getInputs(0)->getDims();
|
||||||
int len = dim.size();
|
|
||||||
int size = 1;
|
|
||||||
for (int i = 0; i < len; ++i) {
|
|
||||||
size *= dim[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
int dim_array[1] = {size};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY,
|
||||||
CNNL_DTYPE_FLOAT, 1, dim_array));
|
CNNL_DTYPE_FLOAT, dim.size(),
|
||||||
|
dim.data()));
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlCopy(context->cnnlHandle(), aDesc, inData, aDesc, outData);
|
cnnlCopy(context->cnnlHandle(), aDesc, inData, aDesc, outData);
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
|
|
|
@ -13,20 +13,17 @@ class RsqrtCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlRsqrt_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
cnnlRsqrt_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
||||||
|
@ -34,8 +31,6 @@ class RsqrtCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,39 +10,26 @@ class SplitCnnl : public BangKernelWithoutConfig {
|
||||||
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
auto context = dynamic_cast<const BangRuntimeObj *>(_context);
|
||||||
int num = op->numOutputs();
|
int num = op->numOutputs();
|
||||||
int axis = op->getDim();
|
int axis = op->getDim();
|
||||||
void *argv[num];
|
|
||||||
for (int i = 0; i < num; ++i) {
|
|
||||||
argv[i] = op->getOutput(i)->getRawDataPtr<void *>();
|
|
||||||
}
|
|
||||||
void *const inputData = (op->getInputs(0)->getRawDataPtr<void *>());
|
|
||||||
|
|
||||||
cnnlTensorDescriptor_t desc;
|
|
||||||
|
|
||||||
int dimout_array[num][4];
|
|
||||||
for (int i = 0; i < num; ++i) {
|
|
||||||
auto dim = op->getOutput(i)->getDims();
|
|
||||||
if (dim.size() != 4) {
|
|
||||||
IT_TODO_HALT();
|
|
||||||
}
|
|
||||||
dimout_array[i][0] = dim[0];
|
|
||||||
dimout_array[i][1] = dim[1];
|
|
||||||
dimout_array[i][2] = dim[2];
|
|
||||||
dimout_array[i][3] = dim[3];
|
|
||||||
}
|
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto dim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4) {
|
cnnlTensorDescriptor_t desc;
|
||||||
IT_TODO_HALT();
|
|
||||||
}
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&desc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&desc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
desc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, dim.size(), dim.data()));
|
||||||
|
|
||||||
cnnlTensorDescriptor_t descArray[num];
|
cnnlTensorDescriptor_t descArray[num];
|
||||||
for (int i = 0; i < num; ++i) {
|
for (int i = 0; i < num; ++i) {
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&descArray[i]));
|
checkCnnlError(cnnlCreateTensorDescriptor(&descArray[i]));
|
||||||
checkCnnlError(
|
checkCnnlError(cnnlSetTensorDescriptor(
|
||||||
cnnlSetTensorDescriptor(descArray[i], CNNL_LAYOUT_NCHW,
|
descArray[i], CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT,
|
||||||
CNNL_DTYPE_FLOAT, 4, dimout_array[i]));
|
op->getOutput(i)->getDims().size(),
|
||||||
|
op->getOutput(i)->getDims().data()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *const inputData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||||
|
void *argv[num];
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
argv[i] = op->getOutput(i)->getRawDataPtr<void *>();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
|
@ -55,8 +42,6 @@ class SplitCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
for (int i = 0; i < num; ++i) {
|
for (int i = 0; i < num; ++i) {
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(descArray[i]));
|
checkCnnlError(cnnlDestroyTensorDescriptor(descArray[i]));
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,20 +13,17 @@ class SqrtCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
cnnlStatus_t stat =
|
cnnlStatus_t stat =
|
||||||
cnnlSqrt_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
cnnlSqrt_v2(context->cnnlHandle(), CNNL_COMPUTATION_HIGH_PRECISION,
|
||||||
|
@ -34,8 +31,6 @@ class SqrtCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,26 +15,21 @@ class TransposeCnnl : public BangKernelWithoutConfig {
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dimin = op->getInputs(0)->getDims();
|
auto dimin = op->getInputs(0)->getDims();
|
||||||
auto dimout = op->getOutput()->getDims();
|
auto dimout = op->getOutput()->getDims();
|
||||||
if (dimin.size() != 4 || dimout.size() != 4)
|
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dimin_array[4] = {dimin[0], dimin[1], dimin[2], dimin[3]};
|
|
||||||
int dimout_array[4] = {dimout[0], dimout[1], dimout[2], dimout[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_ARRAY,
|
||||||
aDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT, 4, dimin_array));
|
CNNL_DTYPE_FLOAT, dimin.size(),
|
||||||
|
dimin.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_ARRAY,
|
||||||
cDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_FLOAT, 4, dimout_array));
|
CNNL_DTYPE_FLOAT, dimout.size(),
|
||||||
|
dimout.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
auto permute = op->getPermute();
|
auto permute = op->getPermute();
|
||||||
cnnlTransposeDescriptor_t opDesc;
|
cnnlTransposeDescriptor_t opDesc;
|
||||||
checkCnnlError(cnnlCreateTransposeDescriptor(&opDesc));
|
checkCnnlError(cnnlCreateTransposeDescriptor(&opDesc));
|
||||||
checkCnnlError(cnnlSetTransposeDescriptor(opDesc, 4, permute.data()));
|
checkCnnlError(
|
||||||
|
cnnlSetTransposeDescriptor(opDesc, permute.size(), permute.data()));
|
||||||
|
|
||||||
size_t wsSize;
|
size_t wsSize;
|
||||||
cnnlGetTransposeWorkspaceSize(context->cnnlHandle(), aDesc, opDesc,
|
cnnlGetTransposeWorkspaceSize(context->cnnlHandle(), aDesc, opDesc,
|
||||||
|
@ -47,8 +42,6 @@ class TransposeCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
checkCnnlError(cnnlDestroyTransposeDescriptor(opDesc));
|
checkCnnlError(cnnlDestroyTransposeDescriptor(opDesc));
|
||||||
|
|
|
@ -15,22 +15,18 @@ class TrigonCnnl : public BangKernelWithoutConfig {
|
||||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||||
|
|
||||||
cnnlTensorDescriptor_t aDesc, cDesc;
|
cnnlTensorDescriptor_t aDesc, cDesc;
|
||||||
auto dim = op->getInputs(0)->getDims();
|
auto aDim = op->getInputs(0)->getDims();
|
||||||
if (dim.size() != 4)
|
auto cDim = op->getOutput()->getDims();
|
||||||
IT_TODO_HALT();
|
|
||||||
|
|
||||||
int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]};
|
|
||||||
// get inputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&aDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(aDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, aDim.size(),
|
||||||
|
aDim.data()));
|
||||||
// get outputs
|
|
||||||
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
checkCnnlError(cnnlCreateTensorDescriptor(&cDesc));
|
||||||
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
checkCnnlError(cnnlSetTensorDescriptor(cDesc, CNNL_LAYOUT_NCHW,
|
||||||
CNNL_DTYPE_FLOAT, 4, dim_array));
|
CNNL_DTYPE_FLOAT, cDim.size(),
|
||||||
|
cDim.data()));
|
||||||
|
|
||||||
// get op descriptor
|
|
||||||
cnnlTrigonDescriptor_t opDesc;
|
cnnlTrigonDescriptor_t opDesc;
|
||||||
checkCnnlError(cnnlCreateTrigonDescriptor(&opDesc));
|
checkCnnlError(cnnlCreateTrigonDescriptor(&opDesc));
|
||||||
checkCnnlError(cnnlSetTrigonDescriptor(opDesc, getOpType()));
|
checkCnnlError(cnnlSetTrigonDescriptor(opDesc, getOpType()));
|
||||||
|
@ -40,8 +36,6 @@ class TrigonCnnl : public BangKernelWithoutConfig {
|
||||||
if (stat != CNNL_STATUS_SUCCESS)
|
if (stat != CNNL_STATUS_SUCCESS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Destories in BANG does not require sync. But cnnl does not state
|
|
||||||
// whether sync is required before destories.
|
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(aDesc));
|
||||||
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
checkCnnlError(cnnlDestroyTensorDescriptor(cDesc));
|
||||||
checkCnnlError(cnnlDestroyTrigonDescriptor(opDesc));
|
checkCnnlError(cnnlDestroyTrigonDescriptor(opDesc));
|
||||||
|
|
Loading…
Reference in New Issue