forked from jiuyuan/InfiniTensor
modified dynamic_quantize_linear
This commit is contained in:
parent
7b48b93fb3
commit
ce23b8356f
|
@ -3,7 +3,7 @@
|
|||
#include "cuda/cuda_common.h"
|
||||
|
||||
__device__ float _saturate(float x) {
|
||||
return x < 0.f ? 0.f : (x > 255.0 ? 255.0 : x);
|
||||
return x < 0.f ? 0.f : (x > 255.0f ? 255.0f : x);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
@ -22,8 +22,8 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
float *yScale, uint8_t *yZeroPoint,
|
||||
int size) {
|
||||
int i = threadIdx.x + blockIdx.x * BLOCK_DIM;
|
||||
float maxData = __FLT_MAX__;
|
||||
float minData = -__FLT_MAX__;
|
||||
float maxData = -__FLT_MAX__;
|
||||
float minData = __FLT_MAX__;
|
||||
int remain = size % BLOCK_DIM;
|
||||
int step = (size - remain) / BLOCK_DIM + 1;
|
||||
|
||||
|
@ -38,6 +38,7 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
(threadIdx.x - remain) * (step - 1) + ind]);
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x < remain) {
|
||||
for (int ind = 0; ind < step; ind++) {
|
||||
minData = min___(minData, input[threadIdx.x * step + ind]);
|
||||
|
@ -49,25 +50,27 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
(threadIdx.x - remain) * (step - 1) + ind]);
|
||||
}
|
||||
}
|
||||
typedef cub::BlockReduce<float, BLOCK_DIM> BlockReduce;
|
||||
__shared__ typename BlockReduce::TempStorage temp_storage;
|
||||
__shared__ float maxTotal;
|
||||
float blockMax = BlockReduce(temp_storage).Reduce(maxData, cub::Max());
|
||||
|
||||
__shared__ float minTotal;
|
||||
float blockMin = BlockReduce(temp_storage).Reduce(minData, cub::Min());
|
||||
if (threadIdx.x == 0) {
|
||||
maxTotal = blockMax;
|
||||
minTotal = blockMin;
|
||||
}
|
||||
__syncthreads();
|
||||
typedef cub::BlockReduce<float, BLOCK_DIM> BlockReduce;
|
||||
__shared__ typename BlockReduce::TempStorage temp_storageMax;
|
||||
|
||||
float blockMax = BlockReduce(temp_storageMax).Reduce(maxData, cub::Max());
|
||||
|
||||
__shared__ typename BlockReduce::TempStorage temp_storageMin;
|
||||
|
||||
float blockMin = BlockReduce(temp_storageMin).Reduce(minData, cub::Min());
|
||||
//-----
|
||||
int qmax = 255;
|
||||
int qmin = 0;
|
||||
float absMax = max___(abs(maxTotal), abs(minTotal));
|
||||
yScale[0] = absMax * 2 / (254 - qmin);
|
||||
float intermediate_zero_point = 254 - absMax / yScale[0];
|
||||
float _yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
__shared__ float _yZeroPoint;
|
||||
if (threadIdx.x == 0) {
|
||||
yScale[0] = (max(0.0f, blockMax) - min(0.0f, blockMin)) / (qmax - qmin);
|
||||
float intermediate_zero_point = qmin - blockMin / yScale[0];
|
||||
_yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (i < size) {
|
||||
outputY[i] = static_cast<uint8_t>(
|
||||
_saturate(round(input[i] / yScale[0]) + _yZeroPoint));
|
||||
|
@ -81,8 +84,8 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
float *yScale, uint8_t *yZeroPoint,
|
||||
int size) {
|
||||
int i = threadIdx.x + blockIdx.x * BLOCK_DIM;
|
||||
float maxData = __FLT_MAX__;
|
||||
float minData = -__FLT_MAX__;
|
||||
float maxData = -__FLT_MAX__;
|
||||
float minData = __FLT_MAX__;
|
||||
int remain = size % BLOCK_DIM;
|
||||
int step = (size - remain) / BLOCK_DIM + 1;
|
||||
float dataPerThread[numPerThread];
|
||||
|
@ -99,6 +102,7 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
maxData = max___(maxData, dataPerThread[ind]);
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x < remain) {
|
||||
for (int ind = 0; ind < step; ind++) {
|
||||
minData = min___(minData, dataPerThread[ind]);
|
||||
|
@ -108,25 +112,27 @@ __launch_bounds__(BLOCK_DIM) __global__
|
|||
minData = min___(minData, dataPerThread[ind]);
|
||||
}
|
||||
}
|
||||
typedef cub::BlockReduce<float, BLOCK_DIM> BlockReduce;
|
||||
__shared__ typename BlockReduce::TempStorage temp_storage;
|
||||
__shared__ float maxTotal;
|
||||
float blockMax = BlockReduce(temp_storage).Reduce(maxData, cub::Max());
|
||||
|
||||
__shared__ float minTotal;
|
||||
float blockMin = BlockReduce(temp_storage).Reduce(minData, cub::Min());
|
||||
if (threadIdx.x == 0) {
|
||||
maxTotal = blockMax;
|
||||
minTotal = blockMin;
|
||||
}
|
||||
__syncthreads();
|
||||
typedef cub::BlockReduce<float, BLOCK_DIM> BlockReduce;
|
||||
__shared__ typename BlockReduce::TempStorage temp_storageMax;
|
||||
|
||||
float blockMax = BlockReduce(temp_storageMax).Reduce(maxData, cub::Max());
|
||||
|
||||
__shared__ typename BlockReduce::TempStorage temp_storageMin;
|
||||
|
||||
float blockMin = BlockReduce(temp_storageMin).Reduce(minData, cub::Min());
|
||||
//-----
|
||||
int qmax = 255;
|
||||
int qmin = 0;
|
||||
float absMax = max___(abs(maxTotal), abs(minTotal));
|
||||
yScale[0] = absMax * 2 / (254 - qmin);
|
||||
float intermediate_zero_point = 254 - absMax / yScale[0];
|
||||
float _yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
__shared__ float _yZeroPoint;
|
||||
if (threadIdx.x == 0) {
|
||||
yScale[0] = (max(0.0f, blockMax) - min(0.0f, blockMin)) / (qmax - qmin);
|
||||
float intermediate_zero_point = qmin - blockMin / yScale[0];
|
||||
_yZeroPoint = round(_saturate(intermediate_zero_point));
|
||||
yZeroPoint[0] = static_cast<uint8_t>(_yZeroPoint);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (i < size) {
|
||||
outputY[i] = static_cast<uint8_t>(
|
||||
_saturate(round(input[i] / yScale[0]) + _yZeroPoint));
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
#include "core/graph.h"
|
||||
#include "core/runtime.h"
|
||||
#include "cuda/cuda_runtime.h"
|
||||
#include "cuda/cuda_utility.h"
|
||||
#include "operators/dynamic_quantize_linear.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
void test_dynamicquantizeLinearFp32(
|
||||
const Shape &inputShape, const vector<float> &inputData,
|
||||
const vector<uint8_t> &outputYData, const vector<float> &outputYScaleData,
|
||||
const vector<uint8_t> &outputYZeroPointData) {
|
||||
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph gCpu = make_ref<GraphObj>(runtime);
|
||||
|
||||
auto input = gCpu->addTensor(inputShape, DataType::Float32);
|
||||
|
||||
gCpu->dataMalloc();
|
||||
|
||||
input->copyin(inputData);
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
auto inputGpu = gCuda->cloneTensor(input);
|
||||
|
||||
auto op = gCuda->addOp<DynamicQuantizeLinearObj>(
|
||||
inputGpu,
|
||||
std::nullopt); // DynamicQuantizeLinear
|
||||
gCuda->dataMalloc();
|
||||
|
||||
inputGpu->copyin(inputData);
|
||||
|
||||
cudaRuntime->run(gCuda);
|
||||
|
||||
EXPECT_EQ(op->getOutputs().size(), (size_t)3);
|
||||
auto o0Cpu = gCpu->cloneTensor(op->getOutput(0));
|
||||
auto o1Cpu = gCpu->cloneTensor(op->getOutput(1));
|
||||
auto o2Cpu = gCpu->cloneTensor(op->getOutput(2));
|
||||
|
||||
EXPECT_TRUE(o0Cpu->equalData(outputYData));
|
||||
EXPECT_TRUE(o1Cpu->equalData(outputYScaleData));
|
||||
EXPECT_TRUE(o2Cpu->equalData(outputYZeroPointData));
|
||||
}
|
||||
|
||||
TEST(CUDA_DynamicquantizeLinearFp32, run) {
|
||||
|
||||
test_dynamicquantizeLinearFp32(
|
||||
Shape{2, 3, 2, 3},
|
||||
vector<float>{0., 1., 2., 3., 4., 5., 6., 7., 8.,
|
||||
9., 10., 11., 12., 13., 14., 15., 16., 17.,
|
||||
18., 19., 20., 21., 22., 23., 24., 25., 26.,
|
||||
27., 28., 29., 30., 31., 32., 33., 34., 35.},
|
||||
vector<uint8_t>{0, 7, 15, 22, 29, 36, 44, 51, 58,
|
||||
66, 73, 80, 87, 95, 102, 109, 117, 124,
|
||||
131, 138, 146, 153, 160, 168, 175, 182, 189,
|
||||
197, 204, 211, 219, 226, 233, 240, 248, 255},
|
||||
vector<float>{0.1372549}, vector<uint8_t>{0});
|
||||
test_dynamicquantizeLinearFp32(
|
||||
Shape{2, 3, 2, 2},
|
||||
vector<float>{0., 1., 2., 3., 4., 5., 6., 7.,
|
||||
8., 9., 10., 11., 12., 13., 14., 15.,
|
||||
16., 17., 18., 19., 20., 21., 22., 23.},
|
||||
vector<uint8_t>{0, 11, 22, 33, 44, 55, 67, 78,
|
||||
89, 100, 111, 122, 133, 144, 155, 166,
|
||||
177, 188, 200, 211, 222, 233, 244, 255},
|
||||
vector<float>{0.0901961}, vector<uint8_t>{0});
|
||||
|
||||
} // python output
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue