forked from jiuyuan/InfiniTensor
feat: add frontend DynamicQuantizeLinear and DequantizeLinear kernels
This commit is contained in:
parent
f51ce3231a
commit
c63ed4326d
|
@ -99,6 +99,11 @@ class GraphHandlerObj {
|
|||
int outputType, Tensor input);
|
||||
Tensor depthToSpace(Tensor input, Tensor output, int blocksize,
|
||||
std::string mode);
|
||||
TensorVec dynamicQuantizeLinear(Tensor input,
|
||||
std::optional<TensorVec> outputs);
|
||||
|
||||
Tensor dequantizeLinear(Tensor input, Tensor scale, Tensor zero_point,
|
||||
Tensor output, int axis);
|
||||
|
||||
//------ modifiers
|
||||
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
#include "core/operator.h"
|
||||
|
||||
namespace infini {
|
||||
/**
|
||||
* @brief The linear dequantization operator.
|
||||
* It consumes a quantized tensor, a scale, and a zero point to compute
|
||||
* the full precision tensor.
|
||||
*/
|
||||
class DequantizeLinearObj : public OperatorObj {
|
||||
int axis;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new DequantizeLinear object.
|
||||
*
|
||||
* @param graph The computation graph that this operator belongs to.
|
||||
* @param input The input tensor.
|
||||
* @param scale Scale for input.
|
||||
* @param zero_point Zero point for input.
|
||||
* @param outputs The output tensors.
|
||||
* @param axis The axis of the dequantizing dimension of the input tensor.
|
||||
*/
|
||||
DequantizeLinearObj(GraphObj *graph, Tensor input, Tensor scale,
|
||||
Tensor zero_pointr, Tensor output, int axis);
|
||||
OP_CLONE(DequantizeLinearObj);
|
||||
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
||||
|
||||
std::string toString() const override;
|
||||
int numInputs() const override { return inputs.size(); }
|
||||
int numOutputs() const override { return 1; }
|
||||
|
||||
private:
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
|
||||
vector<DataType> inferDataType(const TensorVec &inputs) const override;
|
||||
};
|
||||
|
||||
} // namespace infini
|
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
#include "core/operator.h"
|
||||
|
||||
namespace infini {
|
||||
/**
|
||||
* @brief A Function to fuse calculation for Scale, Zero Point and FP32->8Bit
|
||||
* conversion of FP32 Input data.
|
||||
*
|
||||
*/
|
||||
class DynamicQuantizeLinearObj : public OperatorObj {
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new DynamicQuantizeLinear object.
|
||||
*
|
||||
* @param graph The computation graph that this operator belongs to.
|
||||
* @param input The input tensor.
|
||||
* @param outputs The output tensors.
|
||||
*/
|
||||
DynamicQuantizeLinearObj(GraphObj *graph, Tensor input,
|
||||
std::optional<TensorVec> outputs);
|
||||
OP_CLONE(DynamicQuantizeLinearObj);
|
||||
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
||||
|
||||
std::string toString() const override;
|
||||
int numInputs() const override { return inputs.size(); }
|
||||
int numOutputs() const override { return 3; }
|
||||
|
||||
private:
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
|
||||
vector<DataType> inferDataType(const TensorVec &inputs) const override;
|
||||
};
|
||||
|
||||
} // namespace infini
|
|
@ -857,6 +857,28 @@ class OnnxStub:
|
|||
tensors[output_name] = self.handler.tensor(dims, tensor.data_type)
|
||||
data[output_name] = tensor
|
||||
tensors[output_name].set_weight()
|
||||
elif node.op_type == "DynamicQuantizeLinear":
|
||||
for name, tensor in zip(
|
||||
node.output,
|
||||
self.handler.dynamicQuantizeLinear(
|
||||
tensors[node.input[0]], None
|
||||
),
|
||||
):
|
||||
tensors[name] = tensor
|
||||
elif node.op_type == "DequantizeLinear":
|
||||
attributes = _parse_attribute(
|
||||
node,
|
||||
{
|
||||
"axis": 1,
|
||||
},
|
||||
)
|
||||
axis = attributes["axis"]
|
||||
tensors[node.output[0]] = self.handler.dequantizeLinear(
|
||||
tensor[node.input[0]],
|
||||
tensor[node.input[1]],
|
||||
tensor[node.input[2]] if len(node.input) > 2 else None,
|
||||
axis,
|
||||
)
|
||||
else:
|
||||
raise Exception('Unsupported operator "{}"'.format(node.op_type))
|
||||
new_node_name.append(node.name)
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "operators/broadcast.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/conv.h"
|
||||
#include "operators/dequantize_linear.h"
|
||||
#include "operators/dynamic_quantize_linear.h"
|
||||
#include "operators/element_wise.h"
|
||||
#include "operators/expand.h"
|
||||
#include "operators/gather.h"
|
||||
|
@ -506,6 +508,35 @@ Tensor GraphHandlerObj::where(Tensor inputX, Tensor inputY, Tensor condition,
|
|||
}
|
||||
}
|
||||
|
||||
TensorVec
|
||||
GraphHandlerObj::dynamicQuantizeLinear(Tensor input,
|
||||
std::optional<TensorVec> outputs) {
|
||||
if (outputs) {
|
||||
g->addOpWithOutputs<DynamicQuantizeLinearObj>(std::move(input),
|
||||
outputs);
|
||||
return *outputs;
|
||||
} else {
|
||||
return g->addOp<DynamicQuantizeLinearObj>(std::move(input), outputs)
|
||||
->getOutputs();
|
||||
}
|
||||
}
|
||||
|
||||
Tensor GraphHandlerObj::dequantizeLinear(Tensor input, Tensor scale,
|
||||
Tensor zero_point, Tensor output,
|
||||
int axis) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<DequantizeLinearObj>(
|
||||
std::move(input), std::move(scale), std::move(zero_point), output,
|
||||
axis);
|
||||
return output;
|
||||
} else {
|
||||
return g
|
||||
->addOp<DequantizeLinearObj>(std::move(input), std::move(scale),
|
||||
std::move(zero_point), output, axis)
|
||||
->getOutput();
|
||||
}
|
||||
}
|
||||
|
||||
Tensor GraphHandlerObj::depthToSpace(Tensor input, Tensor output, int blocksize,
|
||||
std::string mode) {
|
||||
if (output) {
|
||||
|
|
|
@ -40,8 +40,7 @@ class RecvNCCL : public CudaKernelWithoutConfig {
|
|||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Recv, DataType::Float32, RecvNCCL,
|
||||
"Recv_NCCL_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Recv, RecvNCCL, "Recv_NCCL_CUDA");
|
||||
} // namespace infini
|
||||
|
||||
#endif
|
||||
|
|
|
@ -36,8 +36,7 @@ class SendNCCL : public CudaKernelWithoutConfig {
|
|||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Send, DataType::Float32, SendNCCL,
|
||||
"Send_NCCL_CUDA_Float32");
|
||||
REGISTER_KERNEL(Device::CUDA, OpType::Send, SendNCCL, "Send_NCCL_CUDA");
|
||||
} // namespace infini
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
#include "operators/dequantize_linear.h"
|
||||
#include "utils/operator_utils.h"
|
||||
|
||||
namespace infini {
|
||||
DequantizeLinearObj::DequantizeLinearObj(GraphObj *graph, Tensor input,
|
||||
Tensor scale, Tensor zero_point,
|
||||
Tensor output, int axis)
|
||||
: OperatorObj(OpType::DequantizeLinear,
|
||||
zero_point ? TensorVec{input, scale, zero_point}
|
||||
: TensorVec{input, scale},
|
||||
{output}),
|
||||
axis(axis) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
|
||||
optional<vector<Shape>>
|
||||
DequantizeLinearObj::inferShape(const TensorVec &inputs) {
|
||||
return {{inputs[0]->getDims()}};
|
||||
}
|
||||
|
||||
vector<DataType>
|
||||
DequantizeLinearObj::inferDataType(const TensorVec &inputs) const {
|
||||
IT_ASSERT(inputs.size() == 2 || inputs.size() == 3);
|
||||
return {inputs[1]->getDType()};
|
||||
}
|
||||
|
||||
std::string DequantizeLinearObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << "DequantizeLinear[" << getGuid() << "]";
|
||||
os << "(";
|
||||
os << vecToString(inputs[0]->getDims()) << ",";
|
||||
os << "input=" << inputs[0]->getGuid() << ",";
|
||||
os << "scale=" << inputs[1]->getGuid() << ",";
|
||||
os << "axis=" << axis << ",";
|
||||
os << "output=";
|
||||
for (auto output : outputs)
|
||||
os << output->getGuid() << ",";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> DequantizeLinearObj::getWorkloadVector() const {
|
||||
vector<int> ret = inputs[0]->getDims();
|
||||
ret.emplace(ret.begin(), type.underlying());
|
||||
return ret;
|
||||
}
|
||||
|
||||
vector<int> DequantizeLinearObj::getOpAttrVector() const {
|
||||
return {type.underlying()};
|
||||
}
|
||||
|
||||
} // namespace infini
|
|
@ -0,0 +1,46 @@
|
|||
#include "operators/dynamic_quantize_linear.h"
|
||||
#include "utils/operator_utils.h"
|
||||
|
||||
namespace infini {
|
||||
DynamicQuantizeLinearObj::DynamicQuantizeLinearObj(
|
||||
GraphObj *graph, Tensor input, std::optional<TensorVec> outputs)
|
||||
: OperatorObj(OpType::DynamicQuantizeLinear, TensorVec{input},
|
||||
((!outputs) ? TensorVec(3, nullptr) : std::move(*outputs))) {
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
|
||||
optional<vector<Shape>>
|
||||
DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) {
|
||||
return {{inputs[0]->getDims()}};
|
||||
}
|
||||
|
||||
vector<DataType>
|
||||
DynamicQuantizeLinearObj::inferDataType(const TensorVec &inputs) const {
|
||||
IT_ASSERT(inputs.size() == 1);
|
||||
return {inputs[1]->getDType()};
|
||||
}
|
||||
|
||||
std::string DynamicQuantizeLinearObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << "DynamicQuantizeLinear[" << getGuid() << "]";
|
||||
os << "(";
|
||||
os << vecToString(inputs[0]->getDims()) << ",";
|
||||
os << "input=" << inputs[0]->getGuid() << ",";
|
||||
os << "output=";
|
||||
for (auto output : outputs)
|
||||
os << output->getGuid() << ",";
|
||||
os << ")";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> DynamicQuantizeLinearObj::getWorkloadVector() const {
|
||||
vector<int> ret = inputs[0]->getDims();
|
||||
ret.emplace(ret.begin(), type.underlying());
|
||||
return ret;
|
||||
}
|
||||
|
||||
vector<int> DynamicQuantizeLinearObj::getOpAttrVector() const {
|
||||
return {type.underlying()};
|
||||
}
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue