feat: add frontend DynamicQuantizeLinear and DequantizeLinear kernels

2023-12-18 13:58:20 +08:00 · 2023-12-18 13:58:20 +08:00 · c63ed4326d
parent f51ce3231a
commit c63ed4326d
9 changed files with 235 additions and 4 deletions
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@ -99,6 +99,11 @@ class GraphHandlerObj {
                int outputType, Tensor input);
    Tensor depthToSpace(Tensor input, Tensor output, int blocksize,
                        std::string mode);
+    TensorVec dynamicQuantizeLinear(Tensor input,
+                                    std::optional<TensorVec> outputs);
+
+    Tensor dequantizeLinear(Tensor input, Tensor scale, Tensor zero_point,
+                            Tensor output, int axis);

    //------ modifiers

--- a/include/operators/dequantize_linear.h
+++ b/include/operators/dequantize_linear.h
@ -0,0 +1,41 @@
+#pragma once
+#include "core/operator.h"
+
+namespace infini {
+/**
+ * @brief The linear dequantization operator.
+ * It consumes a quantized tensor, a scale, and a zero point to compute
+ * the full precision tensor.
+ */
+class DequantizeLinearObj : public OperatorObj {
+    int axis;
+
+  public:
+    /**
+     * @brief Construct a new DequantizeLinear object.
+     *
+     * @param graph The computation graph that this operator belongs to.
+     * @param input The input tensor.
+     * @param scale Scale for input.
+     * @param zero_point Zero point for input.
+     * @param outputs The output tensors.
+     * @param axis The axis of the dequantizing dimension of the input tensor.
+     */
+    DequantizeLinearObj(GraphObj *graph, Tensor input, Tensor scale,
+                        Tensor zero_pointr, Tensor output, int axis);
+    OP_CLONE(DequantizeLinearObj);
+
+    optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
+
+    std::string toString() const override;
+    int numInputs() const override { return inputs.size(); }
+    int numOutputs() const override { return 1; }
+
+  private:
+    vector<int> getWorkloadVector() const override;
+    vector<int> getOpAttrVector() const override;
+
+    vector<DataType> inferDataType(const TensorVec &inputs) const override;
+};
+
+} // namespace infini
--- a/include/operators/dynamic_quantize_linear.h
+++ b/include/operators/dynamic_quantize_linear.h
@ -0,0 +1,37 @@
+#pragma once
+#include "core/operator.h"
+
+namespace infini {
+/**
+ * @brief A Function to fuse calculation for Scale, Zero Point and FP32->8Bit
+ * conversion of FP32 Input data.
+ *
+ */
+class DynamicQuantizeLinearObj : public OperatorObj {
+
+  public:
+    /**
+     * @brief Construct a new DynamicQuantizeLinear object.
+     *
+     * @param graph The computation graph that this operator belongs to.
+     * @param input The input tensor.
+     * @param outputs The output tensors.
+     */
+    DynamicQuantizeLinearObj(GraphObj *graph, Tensor input,
+                             std::optional<TensorVec> outputs);
+    OP_CLONE(DynamicQuantizeLinearObj);
+
+    optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
+
+    std::string toString() const override;
+    int numInputs() const override { return inputs.size(); }
+    int numOutputs() const override { return 3; }
+
+  private:
+    vector<int> getWorkloadVector() const override;
+    vector<int> getOpAttrVector() const override;
+
+    vector<DataType> inferDataType(const TensorVec &inputs) const override;
+};
+
+} // namespace infini
--- a/pyinfinitensor/src/pyinfinitensor/onnx.py
+++ b/pyinfinitensor/src/pyinfinitensor/onnx.py
@ -857,6 +857,28 @@ class OnnxStub:
                    tensors[output_name] = self.handler.tensor(dims, tensor.data_type)
                    data[output_name] = tensor
                    tensors[output_name].set_weight()
+                elif node.op_type == "DynamicQuantizeLinear":
+                    for name, tensor in zip(
+                        node.output,
+                        self.handler.dynamicQuantizeLinear(
+                            tensors[node.input[0]], None
+                        ),
+                    ):
+                        tensors[name] = tensor
+                elif node.op_type == "DequantizeLinear":
+                    attributes = _parse_attribute(
+                        node,
+                        {
+                            "axis": 1,
+                        },
+                    )
+                    axis = attributes["axis"]
+                    tensors[node.output[0]] = self.handler.dequantizeLinear(
+                        tensor[node.input[0]],
+                        tensor[node.input[1]],
+                        tensor[node.input[2]] if len(node.input) > 2 else None,
+                        axis,
+                    )
                else:
                    raise Exception('Unsupported operator "{}"'.format(node.op_type))
                new_node_name.append(node.name)
--- a/src/core/graph_handler.cc
+++ b/src/core/graph_handler.cc
@ -6,6 +6,8 @@
 #include "operators/broadcast.h"
 #include "operators/concat.h"
 #include "operators/conv.h"
+#include "operators/dequantize_linear.h"
+#include "operators/dynamic_quantize_linear.h"
 #include "operators/element_wise.h"
 #include "operators/expand.h"
 #include "operators/gather.h"
@ -506,6 +508,35 @@ Tensor GraphHandlerObj::where(Tensor inputX, Tensor inputY, Tensor condition,
    }
 }

+TensorVec
+GraphHandlerObj::dynamicQuantizeLinear(Tensor input,
+                                       std::optional<TensorVec> outputs) {
+    if (outputs) {
+        g->addOpWithOutputs<DynamicQuantizeLinearObj>(std::move(input),
+                                                      outputs);
+        return *outputs;
+    } else {
+        return g->addOp<DynamicQuantizeLinearObj>(std::move(input), outputs)
+            ->getOutputs();
+    }
+}
+
+Tensor GraphHandlerObj::dequantizeLinear(Tensor input, Tensor scale,
+                                         Tensor zero_point, Tensor output,
+                                         int axis) {
+    if (output) {
+        g->addOpWithOutputs<DequantizeLinearObj>(
+            std::move(input), std::move(scale), std::move(zero_point), output,
+            axis);
+        return output;
+    } else {
+        return g
+            ->addOp<DequantizeLinearObj>(std::move(input), std::move(scale),
+                                         std::move(zero_point), output, axis)
+            ->getOutput();
+    }
+}
+
 Tensor GraphHandlerObj::depthToSpace(Tensor input, Tensor output, int blocksize,
                                     std::string mode) {
    if (output) {
--- a/src/kernels/cuda/recv.cc
+++ b/src/kernels/cuda/recv.cc
@ -40,8 +40,7 @@ class RecvNCCL : public CudaKernelWithoutConfig {
    }
 };

-REGISTER_KERNEL(Device::CUDA, OpType::Recv, DataType::Float32, RecvNCCL,
-                "Recv_NCCL_CUDA_Float32");
+REGISTER_KERNEL(Device::CUDA, OpType::Recv, RecvNCCL, "Recv_NCCL_CUDA");
 } // namespace infini

 #endif
--- a/src/kernels/cuda/send.cc
+++ b/src/kernels/cuda/send.cc
@ -36,8 +36,7 @@ class SendNCCL : public CudaKernelWithoutConfig {
    }
 };

-REGISTER_KERNEL(Device::CUDA, OpType::Send, DataType::Float32, SendNCCL,
-                "Send_NCCL_CUDA_Float32");
+REGISTER_KERNEL(Device::CUDA, OpType::Send, SendNCCL, "Send_NCCL_CUDA");
 } // namespace infini

 #endif
--- a/src/operators/dequantize_linear.cc
+++ b/src/operators/dequantize_linear.cc
@ -0,0 +1,51 @@
+#include "operators/dequantize_linear.h"
+#include "utils/operator_utils.h"
+
+namespace infini {
+DequantizeLinearObj::DequantizeLinearObj(GraphObj *graph, Tensor input,
+                                         Tensor scale, Tensor zero_point,
+                                         Tensor output, int axis)
+    : OperatorObj(OpType::DequantizeLinear,
+                  zero_point ? TensorVec{input, scale, zero_point}
+                             : TensorVec{input, scale},
+                  {output}),
+      axis(axis) {
+    IT_ASSERT(checkValid(graph));
+}
+
+optional<vector<Shape>>
+DequantizeLinearObj::inferShape(const TensorVec &inputs) {
+    return {{inputs[0]->getDims()}};
+}
+
+vector<DataType>
+DequantizeLinearObj::inferDataType(const TensorVec &inputs) const {
+    IT_ASSERT(inputs.size() == 2 || inputs.size() == 3);
+    return {inputs[1]->getDType()};
+}
+
+std::string DequantizeLinearObj::toString() const {
+    std::ostringstream os;
+    os << "DequantizeLinear[" << getGuid() << "]";
+    os << "(";
+    os << vecToString(inputs[0]->getDims()) << ",";
+    os << "input=" << inputs[0]->getGuid() << ",";
+    os << "scale=" << inputs[1]->getGuid() << ",";
+    os << "axis=" << axis << ",";
+    os << "output=";
+    for (auto output : outputs)
+        os << output->getGuid() << ",";
+    return os.str();
+}
+
+vector<int> DequantizeLinearObj::getWorkloadVector() const {
+    vector<int> ret = inputs[0]->getDims();
+    ret.emplace(ret.begin(), type.underlying());
+    return ret;
+}
+
+vector<int> DequantizeLinearObj::getOpAttrVector() const {
+    return {type.underlying()};
+}
+
+} // namespace infini
--- a/src/operators/dynamic_quantize_linear.cc
+++ b/src/operators/dynamic_quantize_linear.cc
@ -0,0 +1,46 @@
+#include "operators/dynamic_quantize_linear.h"
+#include "utils/operator_utils.h"
+
+namespace infini {
+DynamicQuantizeLinearObj::DynamicQuantizeLinearObj(
+    GraphObj *graph, Tensor input, std::optional<TensorVec> outputs)
+    : OperatorObj(OpType::DynamicQuantizeLinear, TensorVec{input},
+                  ((!outputs) ? TensorVec(3, nullptr) : std::move(*outputs))) {
+    IT_ASSERT(checkValid(graph));
+}
+
+optional<vector<Shape>>
+DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) {
+    return {{inputs[0]->getDims()}};
+}
+
+vector<DataType>
+DynamicQuantizeLinearObj::inferDataType(const TensorVec &inputs) const {
+    IT_ASSERT(inputs.size() == 1);
+    return {inputs[1]->getDType()};
+}
+
+std::string DynamicQuantizeLinearObj::toString() const {
+    std::ostringstream os;
+    os << "DynamicQuantizeLinear[" << getGuid() << "]";
+    os << "(";
+    os << vecToString(inputs[0]->getDims()) << ",";
+    os << "input=" << inputs[0]->getGuid() << ",";
+    os << "output=";
+    for (auto output : outputs)
+        os << output->getGuid() << ",";
+    os << ")";
+    return os.str();
+}
+
+vector<int> DynamicQuantizeLinearObj::getWorkloadVector() const {
+    vector<int> ret = inputs[0]->getDims();
+    ret.emplace(ret.begin(), type.underlying());
+    return ret;
+}
+
+vector<int> DynamicQuantizeLinearObj::getOpAttrVector() const {
+    return {type.underlying()};
+}
+
+} // namespace infini