forked from jiuyuan/InfiniTensor
Xpu abs (#121)
* add: unary kernel for xpu * formatting * format * format * format * fix: pointer jump * fix optype comments
This commit is contained in:
parent
a4c6214529
commit
85b96d8997
|
@ -21,10 +21,10 @@ struct OpType {
|
|||
Add, // Binary
|
||||
And, // Binary
|
||||
ArgMax, //
|
||||
Asin, // Binary
|
||||
Asinh, // Binary
|
||||
Atan, // Binary
|
||||
Atanh, // Binary
|
||||
Asin, // Unary
|
||||
Asinh, // Unary
|
||||
Atan, // Unary
|
||||
Atanh, // Unary
|
||||
AveragePool, // Pool
|
||||
BatchNormalization, //
|
||||
Bernoulli, //
|
||||
|
|
|
@ -151,14 +151,15 @@ class TensorObj : public TensorBaseObj {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
bool equalDataImpl(const T *a, const T *b, size_t size) const {
|
||||
bool equalDataImpl(const T *a, const T *b, size_t size,
|
||||
double relativeError = 1e-6) const {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
if (a[i] != b[i])
|
||||
return false;
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
if (fabs(a[i] - b[i]) / std::max(fabs(a[i]), fabs(b[i])) >
|
||||
1e-6) {
|
||||
relativeError) {
|
||||
printf("Error on %lu: %f %f\n", i, a[i], b[i]);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -103,7 +103,8 @@ bool TensorObj::equalData(const Tensor &rhs, double relativeError) const {
|
|||
#define TEST_EQUAL(N) \
|
||||
if (dtype == DataType(N)) \
|
||||
return equalDataImpl(getRawDataPtr<DT<N>::t *>(), \
|
||||
rhs->getRawDataPtr<DT<N>::t *>(), size());
|
||||
rhs->getRawDataPtr<DT<N>::t *>(), size(), \
|
||||
relativeError);
|
||||
|
||||
TEST_EQUAL(0) // fmt: new line
|
||||
else TEST_EQUAL(1) //
|
||||
|
|
|
@ -60,6 +60,50 @@ template <typename T> class NaiveSqrt : public NativeUnary<T> {
|
|||
T doCompute(T val) const override { return std::sqrt(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveCos : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::cos(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveSin : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::sin(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveTan : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::tan(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveSinh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::sinh(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveCosh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::cosh(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveErf : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::erf(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveACos : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::acos(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveACosh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::acosh(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveASin : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::asin(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveASinh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::asinh(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveATanh : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::atanh(val); }
|
||||
};
|
||||
|
||||
template <typename T> class Clip : public CpuKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *context) const override {
|
||||
|
@ -79,6 +123,43 @@ template <typename T> class Clip : public CpuKernelWithoutConfig {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T> class Log : public CpuKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *context) const override {
|
||||
auto op = as<LogObj>(_op);
|
||||
T *inptr = op->getInputs(0)->getRawDataPtr<T *>();
|
||||
T *outptr = op->getOutput()->getRawDataPtr<T *>();
|
||||
auto logType = op->getType(); // get log type
|
||||
|
||||
auto len = op->getOutput()->size();
|
||||
for (size_t offset = 0; offset < len; offset++) {
|
||||
T res;
|
||||
auto val = *inptr++;
|
||||
switch (logType) {
|
||||
case LogObj::LogE:
|
||||
res = std::log(val);
|
||||
*outptr++ = res;
|
||||
break;
|
||||
case LogObj::Log2:
|
||||
res = std::log2(val);
|
||||
*outptr++ = res;
|
||||
break;
|
||||
case LogObj::Log10:
|
||||
res = std::log10(val);
|
||||
*outptr++ = res;
|
||||
break;
|
||||
default:
|
||||
printf("LogType not Defined");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> class NaiveATan : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return std::atan(val); }
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Relu, DataType::UInt32,
|
||||
NaiveRelu<uint32_t>, "reluNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Relu, DataType::Float32, NaiveRelu<float>,
|
||||
|
@ -103,4 +184,30 @@ REGISTER_KERNEL(Device::CPU, OpType::Softmax, DataType::Float32,
|
|||
NaiveSoftmax<float>, "softmaxNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Clip, DataType::Float32, Clip<float>,
|
||||
"Clip_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Atan, DataType::Float32, NaiveATan<float>,
|
||||
"Atan_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Log, DataType::Float32, Log<float>,
|
||||
"Log_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Cos, DataType::Float32, NaiveCos<float>,
|
||||
"Cos_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Sin, DataType::Float32, NaiveSin<float>,
|
||||
"Sin_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Tan, DataType::Float32, NaiveTan<float>,
|
||||
"Tan_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Sinh, DataType::Float32, NaiveSinh<float>,
|
||||
"Sinh_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Cosh, DataType::Float32, NaiveCosh<float>,
|
||||
"Cosh_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Erf, DataType::Float32, NaiveErf<float>,
|
||||
"Erf_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Acos, DataType::Float32, NaiveACos<float>,
|
||||
"ACos_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Acosh, DataType::Float32,
|
||||
NaiveACosh<float>, "ACosh_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Asin, DataType::Float32, NaiveASin<float>,
|
||||
"ASin_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Asinh, DataType::Float32,
|
||||
NaiveASinh<float>, "ASinh_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Atanh, DataType::Float32,
|
||||
NaiveATanh<float>, "ATanh_CPU_float32");
|
||||
}; // namespace infini
|
||||
|
|
|
@ -226,6 +226,268 @@ class ReciprocalXdnn : public XPUKernelWithoutConfig {
|
|||
}
|
||||
};
|
||||
|
||||
class AbsXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<UnaryObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
|
||||
auto ret = baidu::xpu::api::abs<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ATanXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<UnaryObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
|
||||
auto ret = baidu::xpu::api::arctan<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class LogXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<LogObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto aDim = op->getInputs(0)->getDims();
|
||||
std::vector<int> divDim = {
|
||||
1,
|
||||
};
|
||||
auto len = op->getInputs(0)->size();
|
||||
// get ptr of tempspace
|
||||
XPUPtr temp = context->getWorkspace(len * sizeof(float));
|
||||
LogObj::LogType type = op->getType();
|
||||
// get output of xpu::api::loge(x)
|
||||
auto ret = baidu::xpu::api::log<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)temp, len);
|
||||
// get ptr of divider
|
||||
XPUPtr dd =
|
||||
(float *)(context->getWorkspace((1 + len) * sizeof(float))) + len;
|
||||
// choose from logE, log2, log10
|
||||
switch (type) {
|
||||
float constant;
|
||||
case LogObj::LogE:
|
||||
// if use loge, copy from temp to cData
|
||||
ret = baidu::xpu::api::copy<float>(
|
||||
context->XPUHandle(), (float *)temp, (float *)cData, len);
|
||||
break;
|
||||
case LogObj::Log2:
|
||||
constant = std::log(2);
|
||||
context->copyBlobFromCPU(dd, &constant, sizeof(float));
|
||||
ret = baidu::xpu::api::broadcast_div<float>(
|
||||
context->XPUHandle(), (float *)temp, (float *)dd,
|
||||
(float *)cData, aDim, divDim);
|
||||
break;
|
||||
case LogObj::Log10:
|
||||
constant = std::log(10);
|
||||
context->copyBlobFromCPU(dd, &constant, sizeof(float));
|
||||
ret = baidu::xpu::api::broadcast_div<float>(
|
||||
context->XPUHandle(), (float *)temp, (float *)dd,
|
||||
(float *)cData, aDim, divDim);
|
||||
break;
|
||||
default:
|
||||
printf("LogType not support!");
|
||||
break;
|
||||
}
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class CosXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<CosObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::cos<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class SinXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<SinObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::sin<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class TanXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<TanObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::tan<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class SinhXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<SinHObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::sinh<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class CoshXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<CosHObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::cosh<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ErfXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ErfObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::erf<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ACosXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ACosObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::arccos<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ACoshXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ACosHObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::acosh<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ASinXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ASinObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::arcsin<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ASinhXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ASinHObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::asinh<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class ATanhXdnn : public XPUKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto op = as<ATanHObj>(_op);
|
||||
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
|
||||
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
|
||||
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
|
||||
auto len = op->getInputs(0)->size();
|
||||
auto ret = baidu::xpu::api::atanh<float>(
|
||||
context->XPUHandle(), (float *)aData, (float *)cData, len);
|
||||
|
||||
assert(ret == 0);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Relu, DataType::Float32, ReluXdnn,
|
||||
"Relu_xdnn_XPU_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Sigmoid, DataType::Float32, SigmoidXdnn,
|
||||
|
@ -257,4 +519,32 @@ REGISTER_KERNEL(Device::XPU, OpType::Flatten, DataType::Float32, CopyXdnn,
|
|||
"Flatten_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Identity, DataType::Float32, CopyXdnn,
|
||||
"Identity_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Abs, DataType::Float32, AbsXdnn,
|
||||
"Abs_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Atan, DataType::Float32, ATanXdnn,
|
||||
"Atan_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Log, DataType::Float32, LogXdnn,
|
||||
"Log_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Cos, DataType::Float32, CosXdnn,
|
||||
"Cos_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Sin, DataType::Float32, SinXdnn,
|
||||
"Sin_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Tan, DataType::Float32, TanXdnn,
|
||||
"Tan_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Sinh, DataType::Float32, SinhXdnn,
|
||||
"Sinh_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Cosh, DataType::Float32, CoshXdnn,
|
||||
"Cosh_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Erf, DataType::Float32, ErfXdnn,
|
||||
"Erf_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Acos, DataType::Float32, ACosXdnn,
|
||||
"ACos_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Acosh, DataType::Float32, ACoshXdnn,
|
||||
"ACosh_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Asin, DataType::Float32, ASinXdnn,
|
||||
"ASin_xdnn_Float32");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Asinh, DataType::Float32, ASinhXdnn,
|
||||
"ASinh_xdnn_Float3 2");
|
||||
REGISTER_KERNEL(Device::XPU, OpType::Atanh, DataType::Float32, ATanhXdnn,
|
||||
"ATanh_xdnn_Float32");
|
||||
}; // namespace infini
|
||||
|
|
|
@ -36,7 +36,7 @@ void testUnary(const std::function<void(void *, size_t, DataType)> &generator,
|
|||
cpuRuntime->run(cpuGraph);
|
||||
auto outputCpu = cpuOp->getOutput();
|
||||
// Check
|
||||
EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu));
|
||||
EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu, 1e-6));
|
||||
}
|
||||
|
||||
void testClip(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
|
@ -103,10 +103,88 @@ void testCast(const std::function<void(void *, size_t, DataType)> &generator,
|
|||
EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu));
|
||||
}
|
||||
|
||||
template <LogObj::LogType T>
|
||||
void testLog(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto xpuRuntime = make_ref<XPURuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
|
||||
// GPU
|
||||
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
|
||||
auto inputGpu = xpuGraph->cloneTensor(inputCpu);
|
||||
auto gpuOp = xpuGraph->addOp<LogObj>(inputGpu, nullptr, T);
|
||||
xpuGraph->dataMalloc();
|
||||
inputGpu->setData(generator);
|
||||
xpuRuntime->run(xpuGraph);
|
||||
auto outputGpu = gpuOp->getOutput();
|
||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
||||
// CPU
|
||||
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
|
||||
auto cpuOp = cpuGraph->addOp<LogObj>(inputCpu, nullptr, T);
|
||||
cpuGraph->addTensor(inputCpu);
|
||||
cpuGraph->dataMalloc();
|
||||
inputCpu->setData(generator);
|
||||
cpuRuntime->run(cpuGraph);
|
||||
auto outputCpu = cpuOp->getOutput();
|
||||
// Check
|
||||
EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void testTrigon(const std::function<void(void *, size_t, DataType)> &generator,
|
||||
const Shape &shape) {
|
||||
// Runtime
|
||||
Runtime cpuRuntime = NativeCpuRuntimeObj::getInstance();
|
||||
auto xpuRuntime = make_ref<XPURuntimeObj>();
|
||||
|
||||
// Build input data on CPU
|
||||
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
|
||||
// GPU
|
||||
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
|
||||
auto inputGpu = xpuGraph->cloneTensor(inputCpu);
|
||||
auto gpuOp = xpuGraph->addOp<T>(inputGpu, nullptr);
|
||||
xpuGraph->dataMalloc();
|
||||
inputGpu->setData(generator);
|
||||
xpuRuntime->run(xpuGraph);
|
||||
auto outputGpu = gpuOp->getOutput();
|
||||
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
|
||||
// CPU
|
||||
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
|
||||
auto cpuOp = cpuGraph->addOp<T>(inputCpu, nullptr);
|
||||
cpuGraph->addTensor(inputCpu);
|
||||
cpuGraph->dataMalloc();
|
||||
inputCpu->setData(generator);
|
||||
cpuRuntime->run(cpuGraph);
|
||||
auto outputCpu = cpuOp->getOutput();
|
||||
// Check
|
||||
EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu, 1e-3));
|
||||
}
|
||||
|
||||
TEST(xdnn_Unary, run) {
|
||||
testUnary<ReluObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<SigmoidObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<TanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<AbsObj>(ValGenerator<-1>(), Shape{1, 2, 2, 3});
|
||||
testUnary<ATanObj>(OneGenerator(), Shape{1, 2, 2, 3});
|
||||
testLog<LogObj::Log10>(ValGenerator<2>(), Shape{1, 2, 2, 3});
|
||||
testLog<LogObj::Log2>(ValGenerator<2>(), Shape{1, 2, 2, 3});
|
||||
testLog<LogObj::LogE>(ValGenerator<2>(), Shape{1, 2, 2, 3});
|
||||
testTrigon<CosObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<SinObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<TanObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<SinHObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<CosHObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<ErfObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<ACosObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<ACosHObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<ASinObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<ASinHObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testTrigon<ATanHObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
}
|
||||
|
||||
} // namespace infini
|
||||
|
|
Loading…
Reference in New Issue