forked from jiuyuan/InfiniTensor
Add Neg operator and kernel (#152)
* Add Neg operator and kernel * handle neg in to_onnx --------- Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
7a9fcd93b2
commit
7600fe688c
|
@ -51,6 +51,7 @@ class GraphHandlerObj {
|
|||
Tensor softmax(Tensor x, Tensor y, int axis);
|
||||
Tensor abs(Tensor x, Tensor y);
|
||||
Tensor sqrt(Tensor x, Tensor y);
|
||||
Tensor neg(Tensor x, Tensor y);
|
||||
Tensor shape(Tensor x, Tensor y);
|
||||
Tensor identity(Tensor x, Tensor y);
|
||||
Tensor flatten(Tensor s, Tensor y, int axis);
|
||||
|
|
|
@ -3,14 +3,14 @@
|
|||
#include "operators/unary.h"
|
||||
|
||||
namespace infini {
|
||||
// TODO(constroy): num should be size_t.
|
||||
void softmax_kernel(float *input, float *output, int num);
|
||||
void relu_kernel(float *input, float *output, int num);
|
||||
void sigmoid_kernel(float *input, float *output, int num);
|
||||
void tanh_kernel(float *input, float *output, int num);
|
||||
void abs_kernel(float *input, float *output, int num);
|
||||
void sqrt_kernel(float *input, float *output, int num);
|
||||
void erf_kernel(float *input, float *output, int num);
|
||||
void softmax_kernel(float *input, float *output, size_t num);
|
||||
void relu_kernel(float *input, float *output, size_t num);
|
||||
void sigmoid_kernel(float *input, float *output, size_t num);
|
||||
void tanh_kernel(float *input, float *output, size_t num);
|
||||
void abs_kernel(float *input, float *output, size_t num);
|
||||
void sqrt_kernel(float *input, float *output, size_t num);
|
||||
void neg_kernel(float *input, float *output, size_t num);
|
||||
void erf_kernel(float *input, float *output, size_t num);
|
||||
|
||||
void unary_kernel(const Operator &_op) {
|
||||
auto op = as<UnaryObj>(_op);
|
||||
|
@ -30,6 +30,8 @@ void unary_kernel(const Operator &_op) {
|
|||
abs_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Sqrt)
|
||||
sqrt_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Neg)
|
||||
neg_kernel(inputData, outputData, num);
|
||||
else if (op->getOpType() == OpType::Erf)
|
||||
erf_kernel(inputData, outputData, num);
|
||||
else
|
||||
|
|
|
@ -403,6 +403,11 @@ class OnnxStub:
|
|||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Neg":
|
||||
tensors[node.output[0]] = self.handler.neg(
|
||||
tensors[node.input[0]],
|
||||
tensors.get(node.output[0]),
|
||||
)
|
||||
elif node.op_type == "Shape":
|
||||
tensors[node.output[0]] = self.handler.shape(
|
||||
tensors[node.input[0]],
|
||||
|
@ -916,6 +921,7 @@ class OnnxStub:
|
|||
backend.OpTypeId.PRelu,
|
||||
backend.OpTypeId.Sqrt,
|
||||
backend.OpTypeId.Erf,
|
||||
backend.OpTypeId.Neg,
|
||||
]:
|
||||
ctx.push_node(make_node(ty.name, inputs, outputs, name))
|
||||
elif ty == backend.OpTypeId.Flatten:
|
||||
|
|
|
@ -243,6 +243,12 @@ class TestStringMethods(unittest.TestCase):
|
|||
y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
abs = make_node("Abs", ["x"], ["y"], name="abs")
|
||||
make_and_import_model(make_graph([abs], "abs", [x], [y]))
|
||||
|
||||
def test_neg(self):
|
||||
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
neg = make_node("Neg", ["x"], ["y"], name="neg")
|
||||
make_and_import_model(make_graph([neg], "neg", [x], [y]))
|
||||
|
||||
def test_identity(self):
|
||||
x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
|
||||
|
|
|
@ -159,6 +159,7 @@ DEFINE_UNARY_METHOD(sigmoid, Sigmoid)
|
|||
DEFINE_UNARY_METHOD(tanh, Tanh)
|
||||
DEFINE_UNARY_METHOD(abs, Abs)
|
||||
DEFINE_UNARY_METHOD(sqrt, Sqrt)
|
||||
DEFINE_UNARY_METHOD(neg, Neg)
|
||||
DEFINE_UNARY_METHOD(shape, Shape)
|
||||
DEFINE_UNARY_METHOD(erf, Erf)
|
||||
|
||||
|
|
|
@ -100,6 +100,7 @@ void export_values(py::module &m) {
|
|||
.VALUE(OpType, Dropout)
|
||||
.VALUE(OpType, Cast)
|
||||
.VALUE(OpType, Sqrt)
|
||||
.VALUE(OpType, Neg)
|
||||
.VALUE(OpType, Expand)
|
||||
.VALUE(OpType, Erf)
|
||||
.VALUE(OpType, Where)
|
||||
|
@ -444,6 +445,7 @@ void init_graph_builder(py::module &m) {
|
|||
.def("softmax", &Handler::softmax, policy::move)
|
||||
.def("abs", &Handler::abs, policy::move)
|
||||
.def("sqrt", &Handler::sqrt, policy::move)
|
||||
.def("neg", &Handler::neg, policy::move)
|
||||
.def("shape", &Handler::shape, policy::move)
|
||||
.def("identity", &Handler::identity, policy::move)
|
||||
.def("flatten", &Handler::flatten, policy::move)
|
||||
|
|
|
@ -64,6 +64,10 @@ template <typename T> class NaiveErf : public NativeUnary<T> {
|
|||
T doCompute(T val) const override { return std::erf(val); }
|
||||
};
|
||||
|
||||
template <typename T> class NaiveNeg : public NativeUnary<T> {
|
||||
T doCompute(T val) const override { return -val; }
|
||||
};
|
||||
|
||||
template <typename T> class Clip : public CpuKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *context) const override {
|
||||
|
@ -103,6 +107,8 @@ REGISTER_KERNEL(Device::CPU, OpType::Sqrt, DataType::Float32, NaiveSqrt<float>,
|
|||
"sqrtNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Erf, DataType::Float32, NaiveErf<float>,
|
||||
"erfNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Neg, DataType::Float32, NaiveNeg<float>,
|
||||
"negNaive_CPU_float32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Softmax, DataType::UInt32,
|
||||
NaiveSoftmax<uint32_t>, "softmaxNaive_CPU_uint32");
|
||||
REGISTER_KERNEL(Device::CPU, OpType::Softmax, DataType::Float32,
|
||||
|
|
|
@ -140,6 +140,8 @@ REGISTER_KERNEL(Device::CUDA, OpType::Abs, DataType::Float32, UnaryCuda,
|
|||
"Abs_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Sqrt, DataType::Float32, UnaryCuda,
|
||||
"Sqrt_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Neg, DataType::Float32, UnaryCuda,
|
||||
"Neg_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Erf, DataType::Float32, UnaryCuda,
|
||||
"Erf_CUDA_Float32");
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ constexpr unsigned int num_threads() { return 32 * 4; }
|
|||
constexpr int thread_work_size() { return 4; }
|
||||
constexpr int block_work_size() { return thread_work_size() * num_threads(); }
|
||||
|
||||
__global__ void _softmax_kernel1(float *input, float *output, int n) {
|
||||
__global__ void _softmax_kernel1(float *input, float *output, size_t n) {
|
||||
float sum = 0.0f;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
sum += pow(E_CONSTANT, input[i]);
|
||||
|
@ -16,106 +16,121 @@ __global__ void _softmax_kernel1(float *input, float *output, int n) {
|
|||
*output = sum;
|
||||
}
|
||||
|
||||
__global__ void _softmax_kernel2(float *input, float *output, int n) {
|
||||
__global__ void _softmax_kernel2(float *input, float *output, size_t n) {
|
||||
float sum = *output;
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = pow(E_CONSTANT, input[i]) / sum;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _relu_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _relu_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = max(input[i], float(0));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _sigmoid_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _sigmoid_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = 1 / (1 + pow(E_CONSTANT, -input[i]));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _tanh_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _tanh_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = (pow(E_CONSTANT, input[i]) - pow(E_CONSTANT, -input[i])) /
|
||||
(pow(E_CONSTANT, input[i]) + pow(E_CONSTANT, -input[i]));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _abs_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _abs_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = input[i] < 0 ? -input[i] : input[i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _sqrt_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
__global__ void _sqrt_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = sqrt(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _erf_kernel(float *input, float *output, int n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
__global__ void _erf_kernel(float *input, float *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride) {
|
||||
output[i] = erf(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void _neg_kernel(T *input, T *output, size_t n) {
|
||||
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = index; i < n; i += stride) {
|
||||
output[i] = -input[i];
|
||||
}
|
||||
}
|
||||
|
||||
namespace infini {
|
||||
void softmax_kernel(float *input, float *output, int num) {
|
||||
void softmax_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_softmax_kernel1<<<1, 1>>>(input, output, num);
|
||||
_softmax_kernel2<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void relu_kernel(float *input, float *output, int num) {
|
||||
void relu_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_relu_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void sigmoid_kernel(float *input, float *output, int num) {
|
||||
void sigmoid_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_sigmoid_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void tanh_kernel(float *input, float *output, int num) {
|
||||
void tanh_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_tanh_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void abs_kernel(float *input, float *output, int num) {
|
||||
void abs_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_abs_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void sqrt_kernel(float *input, float *output, int num) {
|
||||
void sqrt_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_sqrt_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void erf_kernel(float *input, float *output, int num) {
|
||||
void erf_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_erf_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
void neg_kernel(float *input, float *output, size_t num) {
|
||||
|
||||
int blocksize = block_work_size();
|
||||
int gridsize = (num + block_work_size() - 1) / block_work_size();
|
||||
_neg_kernel<<<gridsize, blocksize>>>(input, output, num);
|
||||
}
|
||||
}; // namespace infini
|
||||
|
|
|
@ -46,6 +46,7 @@ TEST(cuDNN_Unary, run) {
|
|||
testUnary<SigmoidObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<TanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<NegObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
testUnary<ErfObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
|
||||
// more shapes
|
||||
testUnary<SqrtObj>(IncrementalGenerator(), Shape{13});
|
||||
|
|
Loading…
Reference in New Issue