feat: add frontend DynamicQuantizeLinear and DequantizeLinear kernels

2023-12-18 13:58:20 +08:00 · 2023-12-18 13:58:20 +08:00 · c63ed4326d
parent f51ce3231a
commit c63ed4326d
9 changed files with 235 additions and 4 deletions
--- a/include/core/graph_handler.h
+++ b/include/core/graph_handler.h
@ -99,6 +99,11 @@ class GraphHandlerObj {
                int outputType, Tensor input);
    Tensor depthToSpace(Tensor input, Tensor output, int blocksize,
                        std::string mode);
    TensorVec dynamicQuantizeLinear(Tensor input,
                                    std::optional<TensorVec> outputs);
    Tensor dequantizeLinear(Tensor input, Tensor scale, Tensor zero_point,
                            Tensor output, int axis);
    //------ modifiers
--- a/include/operators/dequantize_linear.h
+++ b/include/operators/dequantize_linear.h
@ -0,0 +1,41 @@
 #pragma once
 #include "core/operator.h"
 namespace infini {
 /**
 * @brief The linear dequantization operator.
 * It consumes a quantized tensor, a scale, and a zero point to compute
 * the full precision tensor.
 */
 class DequantizeLinearObj : public OperatorObj {
    int axis;
  public:
    /**
     * @brief Construct a new DequantizeLinear object.
     *
     * @param graph The computation graph that this operator belongs to.
     * @param input The input tensor.
     * @param scale Scale for input.
     * @param zero_point Zero point for input.
     * @param outputs The output tensors.
     * @param axis The axis of the dequantizing dimension of the input tensor.
     */
    DequantizeLinearObj(GraphObj *graph, Tensor input, Tensor scale,
                        Tensor zero_pointr, Tensor output, int axis);
    OP_CLONE(DequantizeLinearObj);
    optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
    std::string toString() const override;
    int numInputs() const override { return inputs.size(); }
    int numOutputs() const override { return 1; }
  private:
    vector<int> getWorkloadVector() const override;
    vector<int> getOpAttrVector() const override;
    vector<DataType> inferDataType(const TensorVec &inputs) const override;
 };
 } // namespace infini
--- a/include/operators/dynamic_quantize_linear.h
+++ b/include/operators/dynamic_quantize_linear.h
@ -0,0 +1,37 @@
 #pragma once
 #include "core/operator.h"
 namespace infini {
 /**
 * @brief A Function to fuse calculation for Scale, Zero Point and FP32->8Bit
 * conversion of FP32 Input data.
 *
 */
 class DynamicQuantizeLinearObj : public OperatorObj {
  public:
    /**
     * @brief Construct a new DynamicQuantizeLinear object.
     *
     * @param graph The computation graph that this operator belongs to.
     * @param input The input tensor.
     * @param outputs The output tensors.
     */
    DynamicQuantizeLinearObj(GraphObj *graph, Tensor input,
                             std::optional<TensorVec> outputs);
    OP_CLONE(DynamicQuantizeLinearObj);
    optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
    std::string toString() const override;
    int numInputs() const override { return inputs.size(); }
    int numOutputs() const override { return 3; }
  private:
    vector<int> getWorkloadVector() const override;
    vector<int> getOpAttrVector() const override;
    vector<DataType> inferDataType(const TensorVec &inputs) const override;
 };
 } // namespace infini
--- a/pyinfinitensor/src/pyinfinitensor/onnx.py
+++ b/pyinfinitensor/src/pyinfinitensor/onnx.py
@ -857,6 +857,28 @@ class OnnxStub:
                    tensors[output_name] = self.handler.tensor(dims, tensor.data_type)
                    data[output_name] = tensor
                    tensors[output_name].set_weight()
                elif node.op_type == "DynamicQuantizeLinear":
                    for name, tensor in zip(
                        node.output,
                        self.handler.dynamicQuantizeLinear(
                            tensors[node.input[0]], None
                        ),
                    ):
                        tensors[name] = tensor
                elif node.op_type == "DequantizeLinear":
                    attributes = _parse_attribute(
                        node,
                        {
                            "axis": 1,
                        },
                    )
                    axis = attributes["axis"]
                    tensors[node.output[0]] = self.handler.dequantizeLinear(
                        tensor[node.input[0]],
                        tensor[node.input[1]],
                        tensor[node.input[2]] if len(node.input) > 2 else None,
                        axis,
                    )
                else:
                    raise Exception('Unsupported operator "{}"'.format(node.op_type))
                new_node_name.append(node.name)
--- a/src/core/graph_handler.cc
+++ b/src/core/graph_handler.cc
@ -6,6 +6,8 @@
 #include "operators/broadcast.h"
 #include "operators/concat.h"
 #include "operators/conv.h"
 #include "operators/dequantize_linear.h"
 #include "operators/dynamic_quantize_linear.h"
 #include "operators/element_wise.h"
 #include "operators/expand.h"
 #include "operators/gather.h"
@ -506,6 +508,35 @@ Tensor GraphHandlerObj::where(Tensor inputX, Tensor inputY, Tensor condition,
    }
 }
 TensorVec
 GraphHandlerObj::dynamicQuantizeLinear(Tensor input,
                                       std::optional<TensorVec> outputs) {
    if (outputs) {
        g->addOpWithOutputs<DynamicQuantizeLinearObj>(std::move(input),
                                                      outputs);
        return *outputs;
    } else {
        return g->addOp<DynamicQuantizeLinearObj>(std::move(input), outputs)
            ->getOutputs();
    }
 }
 Tensor GraphHandlerObj::dequantizeLinear(Tensor input, Tensor scale,
                                         Tensor zero_point, Tensor output,
                                         int axis) {
    if (output) {
        g->addOpWithOutputs<DequantizeLinearObj>(
            std::move(input), std::move(scale), std::move(zero_point), output,
            axis);
        return output;
    } else {
        return g
            ->addOp<DequantizeLinearObj>(std::move(input), std::move(scale),
                                         std::move(zero_point), output, axis)
            ->getOutput();
    }
 }
 Tensor GraphHandlerObj::depthToSpace(Tensor input, Tensor output, int blocksize,
                                     std::string mode) {
    if (output) {
--- a/src/kernels/cuda/recv.cc
+++ b/src/kernels/cuda/recv.cc
@ -40,8 +40,7 @@ class RecvNCCL : public CudaKernelWithoutConfig {
    }
 };
-REGISTER_KERNEL(Device::CUDA, OpType::Recv, DataType::Float32, RecvNCCL,
+REGISTER_KERNEL(Device::CUDA, OpType::Recv, RecvNCCL, "Recv_NCCL_CUDA");
                "Recv_NCCL_CUDA_Float32");
 } // namespace infini
 #endif
--- a/src/kernels/cuda/send.cc
+++ b/src/kernels/cuda/send.cc
@ -36,8 +36,7 @@ class SendNCCL : public CudaKernelWithoutConfig {
    }
 };
-REGISTER_KERNEL(Device::CUDA, OpType::Send, DataType::Float32, SendNCCL,
+REGISTER_KERNEL(Device::CUDA, OpType::Send, SendNCCL, "Send_NCCL_CUDA");
                "Send_NCCL_CUDA_Float32");
 } // namespace infini
 #endif
--- a/src/operators/dequantize_linear.cc
+++ b/src/operators/dequantize_linear.cc
@ -0,0 +1,51 @@
 #include "operators/dequantize_linear.h"
 #include "utils/operator_utils.h"
 namespace infini {
 DequantizeLinearObj::DequantizeLinearObj(GraphObj *graph, Tensor input,
                                         Tensor scale, Tensor zero_point,
                                         Tensor output, int axis)
    : OperatorObj(OpType::DequantizeLinear,
                  zero_point ? TensorVec{input, scale, zero_point}
                             : TensorVec{input, scale},
                  {output}),
      axis(axis) {
    IT_ASSERT(checkValid(graph));
 }
 optional<vector<Shape>>
 DequantizeLinearObj::inferShape(const TensorVec &inputs) {
    return {{inputs[0]->getDims()}};
 }
 vector<DataType>
 DequantizeLinearObj::inferDataType(const TensorVec &inputs) const {
    IT_ASSERT(inputs.size() == 2 || inputs.size() == 3);
    return {inputs[1]->getDType()};
 }
 std::string DequantizeLinearObj::toString() const {
    std::ostringstream os;
    os << "DequantizeLinear[" << getGuid() << "]";
    os << "(";
    os << vecToString(inputs[0]->getDims()) << ",";
    os << "input=" << inputs[0]->getGuid() << ",";
    os << "scale=" << inputs[1]->getGuid() << ",";
    os << "axis=" << axis << ",";
    os << "output=";
    for (auto output : outputs)
        os << output->getGuid() << ",";
    return os.str();
 }
 vector<int> DequantizeLinearObj::getWorkloadVector() const {
    vector<int> ret = inputs[0]->getDims();
    ret.emplace(ret.begin(), type.underlying());
    return ret;
 }
 vector<int> DequantizeLinearObj::getOpAttrVector() const {
    return {type.underlying()};
 }
 } // namespace infini
--- a/src/operators/dynamic_quantize_linear.cc
+++ b/src/operators/dynamic_quantize_linear.cc
@ -0,0 +1,46 @@
 #include "operators/dynamic_quantize_linear.h"
 #include "utils/operator_utils.h"
 namespace infini {
 DynamicQuantizeLinearObj::DynamicQuantizeLinearObj(
    GraphObj *graph, Tensor input, std::optional<TensorVec> outputs)
    : OperatorObj(OpType::DynamicQuantizeLinear, TensorVec{input},
                  ((!outputs) ? TensorVec(3, nullptr) : std::move(*outputs))) {
    IT_ASSERT(checkValid(graph));
 }
 optional<vector<Shape>>
 DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) {
    return {{inputs[0]->getDims()}};
 }
 vector<DataType>
 DynamicQuantizeLinearObj::inferDataType(const TensorVec &inputs) const {
    IT_ASSERT(inputs.size() == 1);
    return {inputs[1]->getDType()};
 }
 std::string DynamicQuantizeLinearObj::toString() const {
    std::ostringstream os;
    os << "DynamicQuantizeLinear[" << getGuid() << "]";
    os << "(";
    os << vecToString(inputs[0]->getDims()) << ",";
    os << "input=" << inputs[0]->getGuid() << ",";
    os << "output=";
    for (auto output : outputs)
        os << output->getGuid() << ",";
    os << ")";
    return os.str();
 }
 vector<int> DynamicQuantizeLinearObj::getWorkloadVector() const {
    vector<int> ret = inputs[0]->getDims();
    ret.emplace(ret.begin(), type.underlying());
    return ret;
 }
 vector<int> DynamicQuantizeLinearObj::getOpAttrVector() const {
    return {type.underlying()};
 }
 } // namespace infini