feat: add frontend DynamicQuantizeLinear and DequantizeLinear kernels

This commit is contained in:
kilinchange 2023-12-18 13:58:20 +08:00
parent f51ce3231a
commit c63ed4326d
9 changed files with 235 additions and 4 deletions

View File

@ -99,6 +99,11 @@ class GraphHandlerObj {
int outputType, Tensor input);
Tensor depthToSpace(Tensor input, Tensor output, int blocksize,
std::string mode);
TensorVec dynamicQuantizeLinear(Tensor input,
std::optional<TensorVec> outputs);
Tensor dequantizeLinear(Tensor input, Tensor scale, Tensor zero_point,
Tensor output, int axis);
//------ modifiers

View File

@ -0,0 +1,41 @@
#pragma once
#include "core/operator.h"
namespace infini {
/**
* @brief The linear dequantization operator.
* It consumes a quantized tensor, a scale, and a zero point to compute
* the full precision tensor.
*/
class DequantizeLinearObj : public OperatorObj {
int axis;
public:
/**
* @brief Construct a new DequantizeLinear object.
*
* @param graph The computation graph that this operator belongs to.
* @param input The input tensor.
* @param scale Scale for input.
* @param zero_point Zero point for input.
* @param outputs The output tensors.
* @param axis The axis of the dequantizing dimension of the input tensor.
*/
DequantizeLinearObj(GraphObj *graph, Tensor input, Tensor scale,
Tensor zero_pointr, Tensor output, int axis);
OP_CLONE(DequantizeLinearObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
std::string toString() const override;
int numInputs() const override { return inputs.size(); }
int numOutputs() const override { return 1; }
private:
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
vector<DataType> inferDataType(const TensorVec &inputs) const override;
};
} // namespace infini

View File

@ -0,0 +1,37 @@
#pragma once
#include "core/operator.h"
namespace infini {
/**
* @brief A Function to fuse calculation for Scale, Zero Point and FP32->8Bit
* conversion of FP32 Input data.
*
*/
class DynamicQuantizeLinearObj : public OperatorObj {
public:
/**
* @brief Construct a new DynamicQuantizeLinear object.
*
* @param graph The computation graph that this operator belongs to.
* @param input The input tensor.
* @param outputs The output tensors.
*/
DynamicQuantizeLinearObj(GraphObj *graph, Tensor input,
std::optional<TensorVec> outputs);
OP_CLONE(DynamicQuantizeLinearObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
std::string toString() const override;
int numInputs() const override { return inputs.size(); }
int numOutputs() const override { return 3; }
private:
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
vector<DataType> inferDataType(const TensorVec &inputs) const override;
};
} // namespace infini

View File

@ -857,6 +857,28 @@ class OnnxStub:
tensors[output_name] = self.handler.tensor(dims, tensor.data_type)
data[output_name] = tensor
tensors[output_name].set_weight()
elif node.op_type == "DynamicQuantizeLinear":
for name, tensor in zip(
node.output,
self.handler.dynamicQuantizeLinear(
tensors[node.input[0]], None
),
):
tensors[name] = tensor
elif node.op_type == "DequantizeLinear":
attributes = _parse_attribute(
node,
{
"axis": 1,
},
)
axis = attributes["axis"]
tensors[node.output[0]] = self.handler.dequantizeLinear(
tensor[node.input[0]],
tensor[node.input[1]],
tensor[node.input[2]] if len(node.input) > 2 else None,
axis,
)
else:
raise Exception('Unsupported operator "{}"'.format(node.op_type))
new_node_name.append(node.name)

View File

@ -6,6 +6,8 @@
#include "operators/broadcast.h"
#include "operators/concat.h"
#include "operators/conv.h"
#include "operators/dequantize_linear.h"
#include "operators/dynamic_quantize_linear.h"
#include "operators/element_wise.h"
#include "operators/expand.h"
#include "operators/gather.h"
@ -506,6 +508,35 @@ Tensor GraphHandlerObj::where(Tensor inputX, Tensor inputY, Tensor condition,
}
}
TensorVec
GraphHandlerObj::dynamicQuantizeLinear(Tensor input,
std::optional<TensorVec> outputs) {
if (outputs) {
g->addOpWithOutputs<DynamicQuantizeLinearObj>(std::move(input),
outputs);
return *outputs;
} else {
return g->addOp<DynamicQuantizeLinearObj>(std::move(input), outputs)
->getOutputs();
}
}
Tensor GraphHandlerObj::dequantizeLinear(Tensor input, Tensor scale,
Tensor zero_point, Tensor output,
int axis) {
if (output) {
g->addOpWithOutputs<DequantizeLinearObj>(
std::move(input), std::move(scale), std::move(zero_point), output,
axis);
return output;
} else {
return g
->addOp<DequantizeLinearObj>(std::move(input), std::move(scale),
std::move(zero_point), output, axis)
->getOutput();
}
}
Tensor GraphHandlerObj::depthToSpace(Tensor input, Tensor output, int blocksize,
std::string mode) {
if (output) {

View File

@ -40,8 +40,7 @@ class RecvNCCL : public CudaKernelWithoutConfig {
}
};
REGISTER_KERNEL(Device::CUDA, OpType::Recv, DataType::Float32, RecvNCCL,
"Recv_NCCL_CUDA_Float32");
REGISTER_KERNEL(Device::CUDA, OpType::Recv, RecvNCCL, "Recv_NCCL_CUDA");
} // namespace infini
#endif

View File

@ -36,8 +36,7 @@ class SendNCCL : public CudaKernelWithoutConfig {
}
};
REGISTER_KERNEL(Device::CUDA, OpType::Send, DataType::Float32, SendNCCL,
"Send_NCCL_CUDA_Float32");
REGISTER_KERNEL(Device::CUDA, OpType::Send, SendNCCL, "Send_NCCL_CUDA");
} // namespace infini
#endif

View File

@ -0,0 +1,51 @@
#include "operators/dequantize_linear.h"
#include "utils/operator_utils.h"
namespace infini {
DequantizeLinearObj::DequantizeLinearObj(GraphObj *graph, Tensor input,
Tensor scale, Tensor zero_point,
Tensor output, int axis)
: OperatorObj(OpType::DequantizeLinear,
zero_point ? TensorVec{input, scale, zero_point}
: TensorVec{input, scale},
{output}),
axis(axis) {
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>>
DequantizeLinearObj::inferShape(const TensorVec &inputs) {
return {{inputs[0]->getDims()}};
}
vector<DataType>
DequantizeLinearObj::inferDataType(const TensorVec &inputs) const {
IT_ASSERT(inputs.size() == 2 || inputs.size() == 3);
return {inputs[1]->getDType()};
}
std::string DequantizeLinearObj::toString() const {
std::ostringstream os;
os << "DequantizeLinear[" << getGuid() << "]";
os << "(";
os << vecToString(inputs[0]->getDims()) << ",";
os << "input=" << inputs[0]->getGuid() << ",";
os << "scale=" << inputs[1]->getGuid() << ",";
os << "axis=" << axis << ",";
os << "output=";
for (auto output : outputs)
os << output->getGuid() << ",";
return os.str();
}
vector<int> DequantizeLinearObj::getWorkloadVector() const {
vector<int> ret = inputs[0]->getDims();
ret.emplace(ret.begin(), type.underlying());
return ret;
}
vector<int> DequantizeLinearObj::getOpAttrVector() const {
return {type.underlying()};
}
} // namespace infini

View File

@ -0,0 +1,46 @@
#include "operators/dynamic_quantize_linear.h"
#include "utils/operator_utils.h"
namespace infini {
DynamicQuantizeLinearObj::DynamicQuantizeLinearObj(
GraphObj *graph, Tensor input, std::optional<TensorVec> outputs)
: OperatorObj(OpType::DynamicQuantizeLinear, TensorVec{input},
((!outputs) ? TensorVec(3, nullptr) : std::move(*outputs))) {
IT_ASSERT(checkValid(graph));
}
optional<vector<Shape>>
DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) {
return {{inputs[0]->getDims()}};
}
vector<DataType>
DynamicQuantizeLinearObj::inferDataType(const TensorVec &inputs) const {
IT_ASSERT(inputs.size() == 1);
return {inputs[1]->getDType()};
}
std::string DynamicQuantizeLinearObj::toString() const {
std::ostringstream os;
os << "DynamicQuantizeLinear[" << getGuid() << "]";
os << "(";
os << vecToString(inputs[0]->getDims()) << ",";
os << "input=" << inputs[0]->getGuid() << ",";
os << "output=";
for (auto output : outputs)
os << output->getGuid() << ",";
os << ")";
return os.str();
}
vector<int> DynamicQuantizeLinearObj::getWorkloadVector() const {
vector<int> ret = inputs[0]->getDims();
ret.emplace(ret.begin(), type.underlying());
return ret;
}
vector<int> DynamicQuantizeLinearObj::getOpAttrVector() const {
return {type.underlying()};
}
} // namespace infini