forked from jiuyuan/InfiniTensor
fix: run int8 llama but has nan output
This commit is contained in:
parent
8ae5958b29
commit
8d901ba7aa
|
@ -518,6 +518,8 @@ void init_graph_builder(py::module &m) {
|
|||
.def("erf", &Handler::erf, policy::move)
|
||||
.def("where", &Handler::where, policy::move)
|
||||
.def("dequantizeLinear", &Handler::dequantizeLinear, policy::move)
|
||||
.def("dynamicQuantizeLinear", &Handler::dynamicQuantizeLinear,
|
||||
policy::move)
|
||||
.def("matmulInteger", &Handler::matmulInteger, policy::move)
|
||||
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
||||
.def("optimize", &Handler::optimize, policy::automatic)
|
||||
|
|
|
@ -63,8 +63,9 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
__syncthreads();
|
||||
int qmax = 255;
|
||||
int qmin = 0;
|
||||
yScale[0] = (max___(0.f, maxTotal) - min___(0.f, minTotal)) / (qmax - qmin);
|
||||
float intermediate_zero_point = qmin - minTotal / yScale[0];
|
||||
float absMax = max___(abs(maxTotal), abs(minTotal));
|
||||
yScale[0] = absMax * 2 / (254 - qmin);
|
||||
float intermediate_zero_point = 254 - absMax / yScale[0];
|
||||
float _yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
if (i < size) {
|
||||
|
@ -121,8 +122,9 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
__syncthreads();
|
||||
int qmax = 255;
|
||||
int qmin = 0;
|
||||
yScale[0] = (max___(0.f, maxTotal) - min___(0.f, minTotal)) / (qmax - qmin);
|
||||
float intermediate_zero_point = qmin - minTotal / yScale[0];
|
||||
float absMax = max___(abs(maxTotal), abs(minTotal));
|
||||
yScale[0] = absMax * 2 / (254 - qmin);
|
||||
float intermediate_zero_point = 254 - absMax / yScale[0];
|
||||
float _yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
if (i < size) {
|
||||
|
|
|
@ -27,6 +27,9 @@ class GatherCuda : public CudaKernelWithoutConfig {
|
|||
} else if (op->getDType() == DataType::Int8) {
|
||||
gather_kernel<int8_t>((int8_t *)inputData, (int8_t *)outputData,
|
||||
metaData, op->getOutput()->size());
|
||||
} else if (op->getDType() == DataType::UInt8) {
|
||||
gather_kernel<uint8_t>((uint8_t *)inputData, (uint8_t *)outputData,
|
||||
metaData, op->getOutput()->size());
|
||||
} else {
|
||||
IT_ASSERT(false);
|
||||
}
|
||||
|
|
|
@ -56,4 +56,6 @@ template void gather_kernel<half>(half *in, half *out, GatherMetaData metaData,
|
|||
size_t num);
|
||||
template void gather_kernel<int8_t>(int8_t *in, int8_t *out,
|
||||
GatherMetaData metaData, size_t num);
|
||||
template void gather_kernel<uint8_t>(uint8_t *in, uint8_t *out,
|
||||
GatherMetaData metaData, size_t num);
|
||||
} // namespace infini
|
||||
|
|
|
@ -47,6 +47,11 @@ class CastCuda : public CudaKernelWithoutConfig {
|
|||
op->getOutDType() == DataType::Float32);
|
||||
cast_kernel<int8_t, float>((int8_t *)inputData, (float *)outputData,
|
||||
num);
|
||||
} else if (op->getType() == CastType::Int322Float) {
|
||||
IT_ASSERT(op->getInDType() == DataType::Int32 &&
|
||||
op->getOutDType() == DataType::Float32);
|
||||
cast_kernel<int32_t, float>((int32_t *)inputData,
|
||||
(float *)outputData, num);
|
||||
} else {
|
||||
IT_ASSERT(false);
|
||||
}
|
||||
|
|
|
@ -313,5 +313,7 @@ template void cast_kernel<float, int8_t>(float *input, int8_t *output,
|
|||
size_t num);
|
||||
template void cast_kernel<int8_t, float>(int8_t *input, float *output,
|
||||
size_t num);
|
||||
template void cast_kernel<int32_t, float>(int32_t *input, float *output,
|
||||
size_t num);
|
||||
|
||||
}; // namespace infini
|
||||
|
|
|
@ -11,7 +11,7 @@ DynamicQuantizeLinearObj::DynamicQuantizeLinearObj(
|
|||
|
||||
optional<vector<Shape>>
|
||||
DynamicQuantizeLinearObj::inferShape(const TensorVec &inputs) {
|
||||
return {{inputs[0]->getDims()}};
|
||||
return {{inputs[0]->getDims(), {}, {}}};
|
||||
}
|
||||
|
||||
vector<DataType>
|
||||
|
|
Loading…
Reference in New Issue