183 lines
21 KiB
C
183 lines
21 KiB
C
|
/******************************************************************************
|
||
|
*
|
||
|
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at:
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*
|
||
|
******************************************************************************/
|
||
|
/**
|
||
|
*******************************************************************************
|
||
|
* @file
|
||
|
* ihevc_trans_macros.h
|
||
|
*
|
||
|
* @brief
|
||
|
* Macros used in the forward transform and inverse transform functions
|
||
|
*
|
||
|
* @author
|
||
|
* Ittiam
|
||
|
*
|
||
|
* @remarks
|
||
|
* None
|
||
|
*
|
||
|
*******************************************************************************
|
||
|
*/
|
||
|
#ifndef IHEVC_TRANS_MACROS_H_
|
||
|
#define IHEVC_TRANS_MACROS_H_
|
||
|
|
||
|
#define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
|
||
|
{ \
|
||
|
LWORD64 tmp; \
|
||
|
WORD32 sign; \
|
||
|
WORD32 bit_depth,transform_shift; \
|
||
|
WORD32 q_bits, quant_multiplier; \
|
||
|
\
|
||
|
/* q_bits and q_add calculation*/ \
|
||
|
/* To be moved outside in neon. To be computer once per transform call */ \
|
||
|
bit_depth = 8; \
|
||
|
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
|
||
|
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
|
||
|
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \
|
||
|
\
|
||
|
sign = (inp)<0 ? -1:1; \
|
||
|
\
|
||
|
tmp = (LWORD64)(abs(inp)); \
|
||
|
tmp = tmp * (quant_coeff); \
|
||
|
tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
|
||
|
tmp = tmp >> q_bits; \
|
||
|
\
|
||
|
tmp = tmp * sign; \
|
||
|
out = (WORD16) CLIP_S16(tmp); \
|
||
|
} \
|
||
|
|
||
|
#define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
|
||
|
{ \
|
||
|
LWORD64 tmp; \
|
||
|
WORD32 sign; \
|
||
|
WORD32 transform_shift; \
|
||
|
WORD32 q_bits, quant_multiplier; \
|
||
|
\
|
||
|
/* q_bits and q_add calculation*/ \
|
||
|
/* To be moved outside in neon. To be computer once per transform call */ \
|
||
|
\
|
||
|
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
|
||
|
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
|
||
|
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \
|
||
|
\
|
||
|
sign = (inp)<0 ? -1:1; \
|
||
|
\
|
||
|
tmp = (LWORD64)(abs(inp)); \
|
||
|
tmp = tmp * (quant_coeff); \
|
||
|
tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
|
||
|
tmp = tmp >> q_bits; \
|
||
|
\
|
||
|
tmp = tmp * sign; \
|
||
|
out = (WORD16) CLIP_S16(tmp); \
|
||
|
}
|
||
|
/* added by 100028 */
|
||
|
#define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
|
||
|
{ \
|
||
|
WORD32 tmp; \
|
||
|
WORD32 sign; \
|
||
|
WORD32 bit_depth,transform_shift; \
|
||
|
WORD32 q_bits, quant_multiplier; \
|
||
|
\
|
||
|
/* q_bits and q_add calculation*/ \
|
||
|
/* To be moved outside in neon. To be computer once per transform call */ \
|
||
|
bit_depth = 8; \
|
||
|
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
|
||
|
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
|
||
|
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
|
||
|
\
|
||
|
sign = (inp)<0 ? -1:1; \
|
||
|
\
|
||
|
tmp = (WORD32)(abs(inp)); \
|
||
|
tmp = tmp * (quant_coeff); \
|
||
|
tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
|
||
|
tmp = tmp >> q_bits; \
|
||
|
\
|
||
|
tmp = tmp * sign; \
|
||
|
out = (WORD16) CLIP_S16(tmp); \
|
||
|
}
|
||
|
|
||
|
#define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
|
||
|
{ \
|
||
|
WORD32 tmp; \
|
||
|
WORD32 sign; \
|
||
|
WORD32 transform_shift; \
|
||
|
WORD32 q_bits, quant_multiplier; \
|
||
|
\
|
||
|
/* q_bits and q_add calculation*/ \
|
||
|
/* To be moved outside in neon. To be computer once per transform call */ \
|
||
|
\
|
||
|
transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
|
||
|
quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
|
||
|
q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
|
||
|
\
|
||
|
sign = (inp)<0 ? -1:1; \
|
||
|
\
|
||
|
tmp = (WORD32)(abs(inp)); \
|
||
|
tmp = tmp * (quant_coeff); \
|
||
|
tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
|
||
|
tmp = tmp >> q_bits; \
|
||
|
\
|
||
|
tmp = tmp * sign; \
|
||
|
out = (WORD16) CLIP_S16(tmp); \
|
||
|
}
|
||
|
/* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned */
|
||
|
|
||
|
/* Inverse quantization other than 4x4 */
|
||
|
/* No clipping is needed for "pi2_src"(coefficients) */
|
||
|
#define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \
|
||
|
{ \
|
||
|
WORD32 tmp, add_iq; \
|
||
|
\
|
||
|
add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \
|
||
|
\
|
||
|
tmp = coeff * dequant_coeff ; \
|
||
|
tmp = tmp + add_iq; \
|
||
|
tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \
|
||
|
\
|
||
|
res = CLIP_S16(tmp); \
|
||
|
}
|
||
|
|
||
|
/* 4x4 inverse quantization */
|
||
|
/* Options : */
|
||
|
/* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
|
||
|
/* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */
|
||
|
|
||
|
#define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \
|
||
|
{ \
|
||
|
WORD32 clip_coeff, tmp; \
|
||
|
WORD32 coeff_min,coeff_max; \
|
||
|
WORD32 coeff_bit_range; \
|
||
|
WORD32 add_iq; \
|
||
|
add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \
|
||
|
\
|
||
|
coeff_bit_range = 16; \
|
||
|
if(qp_div > shift_iq) \
|
||
|
coeff_bit_range = 10; \
|
||
|
\
|
||
|
coeff_min = -(1<<(coeff_bit_range-1)); \
|
||
|
coeff_max = (1<<(coeff_bit_range-1)) - 1; \
|
||
|
\
|
||
|
clip_coeff = CLIP3(coeff,coeff_min,coeff_max); \
|
||
|
\
|
||
|
tmp = clip_coeff * dequant_coeff ; \
|
||
|
tmp = tmp + add_iq; \
|
||
|
tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \
|
||
|
\
|
||
|
res = CLIP_S16(tmp); \
|
||
|
}
|
||
|
|
||
|
#endif /* IHEVC_TRANS_MACROS_H_ */
|