diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index f095db81..2b3e45c7 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -99,6 +99,11 @@ class GraphHandlerObj { int outputType, Tensor input); Tensor depthToSpace(Tensor input, Tensor output, int blocksize, std::string mode); + TensorVec dynamicQuantizeLinear(Tensor input, + std::optional outputs); + + Tensor dequantizeLinear(Tensor input, Tensor scale, Tensor zero_point, + Tensor output, int axis); //------ modifiers diff --git a/include/operators/dequantize_linear.h b/include/operators/dequantize_linear.h new file mode 100644 index 00000000..858c17bf --- /dev/null +++ b/include/operators/dequantize_linear.h @@ -0,0 +1,41 @@ +#pragma once +#include "core/operator.h" + +namespace infini { +/** + * @brief The linear dequantization operator. + * It consumes a quantized tensor, a scale, and a zero point to compute + * the full precision tensor. + */ +class DequantizeLinearObj : public OperatorObj { + int axis; + + public: + /** + * @brief Construct a new DequantizeLinear object. + * + * @param graph The computation graph that this operator belongs to. + * @param input The input tensor. + * @param scale Scale for input. + * @param zero_point Zero point for input. + * @param outputs The output tensors. + * @param axis The axis of the dequantizing dimension of the input tensor. + */ + DequantizeLinearObj(GraphObj *graph, Tensor input, Tensor scale, + Tensor zero_pointr, Tensor output, int axis); + OP_CLONE(DequantizeLinearObj); + + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 1; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; + + vector inferDataType(const TensorVec &inputs) const override; +}; + +} // namespace infini diff --git a/include/operators/dynamic_quantize_linear.h b/include/operators/dynamic_quantize_linear.h new file mode 100644 index 00000000..a5363710 --- /dev/null +++ b/include/operators/dynamic_quantize_linear.h @@ -0,0 +1,37 @@ +#pragma once +#include "core/operator.h" + +namespace infini { +/** + * @brief A Function to fuse calculation for Scale, Zero Point and FP32->8Bit + * conversion of FP32 Input data. + * + */ +class DynamicQuantizeLinearObj : public OperatorObj { + + public: + /** + * @brief Construct a new DynamicQuantizeLinear object. + * + * @param graph The computation graph that this operator belongs to. + * @param input The input tensor. + * @param outputs The output tensors. + */ + DynamicQuantizeLinearObj(GraphObj *graph, Tensor input, + std::optional outputs); + OP_CLONE(DynamicQuantizeLinearObj); + + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 3; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; + + vector inferDataType(const TensorVec &inputs) const override; +}; + +} // namespace infini diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 90a3d3ab..ae0d1b18 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -857,6 +857,28 @@ class OnnxStub: tensors[output_name] = self.handler.tensor(dims, tensor.data_type) data[output_name] = tensor tensors[output_name].set_weight() + elif node.op_type == "DynamicQuantizeLinear": + for name, tensor in zip( + node.output, + self.handler.dynamicQuantizeLinear( + tensors[node.input[0]], None + ), + ): + tensors[name] = tensor + elif node.op_type == "DequantizeLinear": + attributes = _parse_attribute( + node, + { + "axis": 1, + }, + ) + axis = attributes["axis"] + tensors[node.output[0]] = self.handler.dequantizeLinear( + tensor[node.input[0]], + tensor[node.input[1]], + tensor[node.input[2]] if len(node.input) > 2 else None, + axis, + ) else: raise Exception('Unsupported operator "{}"'.format(node.op_type)) new_node_name.append(node.name) diff --git a/src/core/graph_handler.cc b/src/core/graph_handler.cc index 1eb73499..2e4c5222 100644 --- a/src/core/graph_handler.cc +++ b/src/core/graph_handler.cc @@ -6,6 +6,8 @@ #include "operators/broadcast.h" #include "operators/concat.h" #include "operators/conv.h" +#include "operators/dequantize_linear.h" +#include "operators/dynamic_quantize_linear.h" #include "operators/element_wise.h" #include "operators/expand.h" #include "operators/gather.h" @@ -506,6 +508,35 @@ Tensor GraphHandlerObj::where(Tensor inputX, Tensor inputY, Tensor condition, } } +TensorVec +GraphHandlerObj::dynamicQuantizeLinear(Tensor input, + std::optional outputs) { + if (outputs) { + g->addOpWithOutputs(std::move(input), + outputs); + return *outputs; + } else { + return g->addOp(std::move(input), outputs) + ->getOutputs(); + } +} + +Tensor GraphHandlerObj::dequantizeLinear(Tensor input, Tensor scale, + Tensor zero_point, Tensor output, + int axis) { + if (output) { + g->addOpWithOutputs( + std::move(input), std::move(scale), std::move(zero_point), output, + axis); + return output; + } else { + return g + ->addOp(std::move(input), std::move(scale), + std::move(zero_point), output, axis) + ->getOutput(); + } +} + Tensor GraphHandlerObj::depthToSpace(Tensor input, Tensor output, int blocksize, std::string mode) { if (output) { diff --git a/src/kernels/cuda/recv.cc b/src/kernels/cuda/recv.cc index 7fd7ee49..42c9073e 100644 --- a/src/kernels/cuda/recv.cc +++ b/src/kernels/cuda/recv.cc @@ -40,8 +40,7 @@ class RecvNCCL : public CudaKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::CUDA, OpType::Recv, DataType::Float32, RecvNCCL, - "Recv_NCCL_CUDA_Float32"); +REGISTER_KERNEL(Device::CUDA, OpType::Recv, RecvNCCL, "Recv_NCCL_CUDA"); } // namespace infini #endif diff --git a/src/kernels/cuda/send.cc b/src/kernels/cuda/send.cc index 38684062..6f8af9aa 100644 --- a/src/kernels/cuda/send.cc +++ b/src/kernels/cuda/send.cc @@ -36,8 +36,7 @@ class SendNCCL : public CudaKernelWithoutConfig { } }; -REGISTER_KERNEL(Device::CUDA, OpType::Send, DataType::Float32, SendNCCL, - "Send_NCCL_CUDA_Float32"); +REGISTER_KERNEL(Device::CUDA, OpType::Send, SendNCCL, "Send_NCCL_CUDA"); } // namespace infini #endif diff --git a/src/operators/dequantize_linear.cc b/src/operators/dequantize_linear.cc new file mode 100644 index 00000000..fc3caa81 --- /dev/null +++ b/src/operators/dequantize_linear.cc @@ -0,0 +1,51 @@ +#include "operators/dequantize_linear.h" +#include "utils/operator_utils.h" + +namespace infini { +DequantizeLinearObj::DequantizeLinearObj(GraphObj *graph, Tensor input, + Tensor scale, Tensor zero_point, + Tensor output, int axis) + : OperatorObj(OpType::DequantizeLinear, + zero_point ? TensorVec{input, scale, zero_point} + : TensorVec{input, scale}, + {output}), + axis(axis) { + IT_ASSERT(checkValid(graph)); +} + +optional> +DequantizeLinearObj::inferShape(const TensorVec &inputs) { + return {{inputs[0]->getDims()}}; +} + +vector +DequantizeLinearObj::inferDataType(const TensorVec &inputs) const { + IT_ASSERT(inputs.size() == 2 || inputs.size() == 3); + return {inputs[1]->getDType()}; +} + +std::string DequantizeLinearObj::toString() const { + std::ostringstream os; + os << "DequantizeLinear[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "scale=" << inputs[1]->getGuid() << ","; + os << "axis=" << axis << ","; + os << "output="; + for (auto output : outputs) + os << output->getGuid() << ","; + return os.str(); +} + +vector DequantizeLinearObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.emplace(ret.begin(), type.underlying()); + return ret; +} + +vector DequantizeLinearObj::getOpAttrVector() const { + return {type.underlying()}; +} + +} // namespace infini diff --git a/src/operators/dynamic_quantize_linear.cc b/src/operators/dynamic_quantize_linear.cc new file mode 100644 index 00000000..a34b4861 --- /dev/null +++ b/src/operators/dynamic_quantize_linear.cc @@ -0,0 +1,46 @@ +#include "operators/dynamic_quantize_linear.h" +#include "utils/operator_utils.h" + +namespace infini { +DynamicQuantizeLinearObj::DynamicQuantizeLinearObj( + GraphObj *graph, Tensor input, std::optional outputs) + : OperatorObj(OpType::DynamicQuantizeLinear, TensorVec{input}, + ((!outputs) ? TensorVec(3, nullptr) : std::move(*outputs))) { + IT_ASSERT(checkValid(graph)); +} + +optional> +DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) { + return {{inputs[0]->getDims()}}; +} + +vector +DynamicQuantizeLinearObj::inferDataType(const TensorVec &inputs) const { + IT_ASSERT(inputs.size() == 1); + return {inputs[1]->getDType()}; +} + +std::string DynamicQuantizeLinearObj::toString() const { + std::ostringstream os; + os << "DynamicQuantizeLinear[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output="; + for (auto output : outputs) + os << output->getGuid() << ","; + os << ")"; + return os.str(); +} + +vector DynamicQuantizeLinearObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.emplace(ret.begin(), type.underlying()); + return ret; +} + +vector DynamicQuantizeLinearObj::getOpAttrVector() const { + return {type.underlying()}; +} + +} // namespace infini