11144 lines
423 KiB
C
11144 lines
423 KiB
C
/******************************************************************************
|
|
*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \file ihevce_enc_loop_utils.c
|
|
*
|
|
* \brief
|
|
* This file contains utility functions of Encode loop
|
|
*
|
|
* \date
|
|
* 18/09/2012
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*
|
|
* List of Functions
|
|
*
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
|
|
/*****************************************************************************/
|
|
/* File Includes */
|
|
/*****************************************************************************/
|
|
/* System include files */
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <stdarg.h>
|
|
#include <math.h>
|
|
#include <limits.h>
|
|
|
|
/* User include files */
|
|
#include "ihevc_typedefs.h"
|
|
#include "itt_video_api.h"
|
|
#include "ihevce_api.h"
|
|
|
|
#include "rc_cntrl_param.h"
|
|
#include "rc_frame_info_collector.h"
|
|
#include "rc_look_ahead_params.h"
|
|
|
|
#include "ihevc_defs.h"
|
|
#include "ihevc_macros.h"
|
|
#include "ihevc_debug.h"
|
|
#include "ihevc_structs.h"
|
|
#include "ihevc_platform_macros.h"
|
|
#include "ihevc_deblk.h"
|
|
#include "ihevc_itrans_recon.h"
|
|
#include "ihevc_chroma_itrans_recon.h"
|
|
#include "ihevc_chroma_intra_pred.h"
|
|
#include "ihevc_intra_pred.h"
|
|
#include "ihevc_inter_pred.h"
|
|
#include "ihevc_mem_fns.h"
|
|
#include "ihevc_padding.h"
|
|
#include "ihevc_weighted_pred.h"
|
|
#include "ihevc_sao.h"
|
|
#include "ihevc_resi_trans.h"
|
|
#include "ihevc_quant_iquant_ssd.h"
|
|
#include "ihevc_cabac_tables.h"
|
|
#include "ihevc_common_tables.h"
|
|
|
|
#include "ihevce_defs.h"
|
|
#include "ihevce_hle_interface.h"
|
|
#include "ihevce_lap_enc_structs.h"
|
|
#include "ihevce_multi_thrd_structs.h"
|
|
#include "ihevce_multi_thrd_funcs.h"
|
|
#include "ihevce_me_common_defs.h"
|
|
#include "ihevce_had_satd.h"
|
|
#include "ihevce_error_codes.h"
|
|
#include "ihevce_bitstream.h"
|
|
#include "ihevce_cabac.h"
|
|
#include "ihevce_rdoq_macros.h"
|
|
#include "ihevce_function_selector.h"
|
|
#include "ihevce_enc_structs.h"
|
|
#include "ihevce_entropy_structs.h"
|
|
#include "ihevce_cmn_utils_instr_set_router.h"
|
|
#include "ihevce_ipe_instr_set_router.h"
|
|
#include "ihevce_decomp_pre_intra_structs.h"
|
|
#include "ihevce_decomp_pre_intra_pass.h"
|
|
#include "ihevce_enc_loop_structs.h"
|
|
#include "ihevce_nbr_avail.h"
|
|
#include "ihevce_enc_loop_utils.h"
|
|
#include "ihevce_sub_pic_rc.h"
|
|
#include "ihevce_global_tables.h"
|
|
#include "ihevce_bs_compute_ctb.h"
|
|
#include "ihevce_cabac_rdo.h"
|
|
#include "ihevce_deblk.h"
|
|
#include "ihevce_frame_process.h"
|
|
#include "ihevce_rc_enc_structs.h"
|
|
#include "hme_datatype.h"
|
|
#include "hme_interface.h"
|
|
#include "hme_common_defs.h"
|
|
#include "hme_defs.h"
|
|
#include "hme_common_utils.h"
|
|
#include "ihevce_me_instr_set_router.h"
|
|
#include "ihevce_enc_subpel_gen.h"
|
|
#include "ihevce_inter_pred.h"
|
|
#include "ihevce_mv_pred.h"
|
|
#include "ihevce_mv_pred_merge.h"
|
|
#include "ihevce_enc_loop_inter_mode_sifter.h"
|
|
#include "ihevce_enc_cu_recursion.h"
|
|
#include "ihevce_enc_loop_pass.h"
|
|
#include "ihevce_common_utils.h"
|
|
#include "ihevce_dep_mngr_interface.h"
|
|
#include "ihevce_sao.h"
|
|
#include "ihevce_tile_interface.h"
|
|
#include "ihevce_profile.h"
|
|
#include "ihevce_stasino_helpers.h"
|
|
#include "ihevce_tu_tree_selector.h"
|
|
|
|
/*****************************************************************************/
|
|
/* Globals */
|
|
/*****************************************************************************/
|
|
|
|
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
|
|
extern const UWORD8 gu1_hevce_scan4x4[3][16];
|
|
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
|
|
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
|
|
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
|
|
|
|
/*****************************************************************************/
|
|
/* Constant Macros */
|
|
/*****************************************************************************/
|
|
#define ENABLE_ZERO_CBF 1
|
|
#define DISABLE_RDOQ_INTRA 0
|
|
|
|
/*****************************************************************************/
|
|
/* Function Definitions */
|
|
/*****************************************************************************/
|
|
void *ihevce_tu_tree_update(
|
|
tu_prms_t *ps_tu_prms,
|
|
WORD32 *pnum_tu_in_cu,
|
|
WORD32 depth,
|
|
WORD32 tu_split_flag,
|
|
WORD32 tu_early_cbf,
|
|
WORD32 i4_x_off,
|
|
WORD32 i4_y_off)
|
|
{
|
|
//WORD32 tu_split_flag = p_tu_split_flag[0];
|
|
WORD32 p_tu_split_flag[4];
|
|
WORD32 p_tu_early_cbf[4];
|
|
|
|
WORD32 tu_size = ps_tu_prms->u1_tu_size;
|
|
|
|
if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
|
|
{
|
|
if((tu_size >> depth) == 32)
|
|
{
|
|
/* Get the individual TU split flags */
|
|
p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
|
|
p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
|
|
p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
|
|
p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
|
|
|
|
/* Get the early CBF flags */
|
|
p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
|
|
p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
|
|
p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
|
|
p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
|
|
}
|
|
else
|
|
{
|
|
/* Get the individual TU split flags */
|
|
p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
|
|
p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
|
|
p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
|
|
p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
|
|
|
|
/* Get the early CBF flags */
|
|
p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
|
|
p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
|
|
p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
|
|
p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
|
|
}
|
|
|
|
ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
|
|
ps_tu_prms,
|
|
pnum_tu_in_cu,
|
|
depth + 1,
|
|
p_tu_split_flag[0],
|
|
p_tu_early_cbf[0],
|
|
i4_x_off,
|
|
i4_y_off);
|
|
|
|
ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
|
|
ps_tu_prms,
|
|
pnum_tu_in_cu,
|
|
depth + 1,
|
|
p_tu_split_flag[1],
|
|
p_tu_early_cbf[1],
|
|
(i4_x_off + (tu_size >> (depth + 1))),
|
|
i4_y_off);
|
|
|
|
ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
|
|
ps_tu_prms,
|
|
pnum_tu_in_cu,
|
|
depth + 1,
|
|
p_tu_split_flag[2],
|
|
p_tu_early_cbf[2],
|
|
i4_x_off,
|
|
(i4_y_off + (tu_size >> (depth + 1))));
|
|
|
|
ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
|
|
ps_tu_prms,
|
|
pnum_tu_in_cu,
|
|
depth + 1,
|
|
p_tu_split_flag[3],
|
|
p_tu_early_cbf[3],
|
|
(i4_x_off + (tu_size >> (depth + 1))),
|
|
(i4_y_off + (tu_size >> (depth + 1))));
|
|
}
|
|
else
|
|
{
|
|
if(tu_split_flag & 0x1)
|
|
{
|
|
/* This piece of code will be entered for the 8x8, if it is split
|
|
Update the 4 child TU's accordingly. */
|
|
|
|
(*pnum_tu_in_cu) += 4;
|
|
|
|
/* TL TU update */
|
|
ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
|
|
|
|
ps_tu_prms->u1_x_off = i4_x_off;
|
|
|
|
ps_tu_prms->u1_y_off = i4_y_off;
|
|
|
|
/* Early CBF is not done for 4x4 transforms */
|
|
ps_tu_prms->i4_early_cbf = 1;
|
|
|
|
ps_tu_prms++;
|
|
|
|
/* TR TU update */
|
|
ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
|
|
|
|
ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
|
|
|
|
ps_tu_prms->u1_y_off = i4_y_off;
|
|
|
|
/* Early CBF is not done for 4x4 transforms */
|
|
ps_tu_prms->i4_early_cbf = 1;
|
|
|
|
ps_tu_prms++;
|
|
|
|
/* BL TU update */
|
|
ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
|
|
|
|
ps_tu_prms->u1_x_off = i4_x_off;
|
|
|
|
ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
|
|
|
|
/* Early CBF is not done for 4x4 transforms */
|
|
ps_tu_prms->i4_early_cbf = 1;
|
|
|
|
ps_tu_prms++;
|
|
|
|
/* BR TU update */
|
|
ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
|
|
|
|
ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
|
|
|
|
ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
|
|
|
|
/* Early CBF is not done for 4x4 transforms */
|
|
ps_tu_prms->i4_early_cbf = 1;
|
|
}
|
|
else
|
|
{
|
|
/* Update the TU params */
|
|
ps_tu_prms->u1_tu_size = tu_size >> depth;
|
|
|
|
ps_tu_prms->u1_x_off = i4_x_off;
|
|
|
|
ps_tu_prms->u1_y_off = i4_y_off;
|
|
|
|
(*pnum_tu_in_cu)++;
|
|
|
|
/* Early CBF update for current TU */
|
|
ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
|
|
}
|
|
if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
|
|
{
|
|
ps_tu_prms++;
|
|
|
|
ps_tu_prms->u1_tu_size = tu_size;
|
|
}
|
|
}
|
|
|
|
return ps_tu_prms;
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_compute_quant_rel_param \endif
|
|
*
|
|
* \brief
|
|
* This function updates quantization related parameters like qp_mod_6 etc in
|
|
* context according to new qp
|
|
*
|
|
* \date
|
|
* 08/01/2013
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
* \return
|
|
*
|
|
* List of Functions
|
|
*
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
|
|
{
|
|
WORD32 i4_div_factor;
|
|
|
|
ps_ctxt->i4_chrm_cu_qp =
|
|
(ps_ctxt->u1_chroma_array_type == 2)
|
|
? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
|
|
: gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
|
|
ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
|
|
i4_div_factor = (i1_cu_qp + 3) / 6;
|
|
i4_div_factor = CLIP3(i4_div_factor, 3, 6);
|
|
ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
|
|
ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
|
|
ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
|
|
|
|
#define INTER_RND_QP_BY_6
|
|
#ifdef INTER_RND_QP_BY_6
|
|
/* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
|
|
{
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
|
|
(WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
|
|
}
|
|
#else
|
|
/* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
|
|
#endif
|
|
|
|
if(ISLICE == ps_ctxt->i1_slice_type)
|
|
{
|
|
/* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
|
|
(WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
|
|
}
|
|
else
|
|
{
|
|
if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
|
|
{
|
|
/* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
|
|
(WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
|
|
}
|
|
else
|
|
{
|
|
/* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
|
|
ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
|
|
/* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
|
|
*
|
|
* \brief
|
|
* Function whihc calculates the Lambda params for current picture
|
|
*
|
|
* \param[in] ps_enc_ctxt : encoder ctxt pointer
|
|
* \param[in] ps_cur_pic_ctxt : current pic ctxt
|
|
* \param[in] i4_cur_frame_qp : current pic QP
|
|
* \param[in] first_field : is first field flag
|
|
* \param[in] i4_temporal_lyr_id : Current picture layer id
|
|
*
|
|
* \return
|
|
* None
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_populate_cl_cu_lambda_prms(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
frm_lambda_ctxt_t *ps_frm_lamda,
|
|
WORD32 i4_slice_type,
|
|
WORD32 i4_temporal_lyr_id,
|
|
WORD32 i4_lambda_type)
|
|
{
|
|
WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
|
|
double lambda_modifier;
|
|
double lambda_uv_modifier;
|
|
double lambda;
|
|
double lambda_uv;
|
|
|
|
WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
|
|
|
|
/*Populate lamda modifier */
|
|
ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
|
|
ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
|
|
ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
|
|
|
|
for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
|
|
i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
|
|
i4_curr_cu_qp++)
|
|
{
|
|
WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
|
|
? MIN(i4_curr_cu_qp, 51)
|
|
: gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
|
|
|
|
i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
|
|
|
|
lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
|
|
if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
|
|
{
|
|
lambda_modifier = ps_frm_lamda->lambda_modifier *
|
|
CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
|
|
lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
|
|
CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
|
|
}
|
|
else
|
|
{
|
|
lambda_modifier = ps_frm_lamda->lambda_modifier;
|
|
lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
|
|
}
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
if(ISLICE == ps_ctxt->i1_slice_type)
|
|
{
|
|
lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
|
|
lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
|
|
}
|
|
else
|
|
{
|
|
lambda_modifier = CONST_LAMDA_MOD_VAL;
|
|
lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
|
|
}
|
|
}
|
|
switch(i4_lambda_type)
|
|
{
|
|
case 0:
|
|
{
|
|
i4_qp_bdoffset = 0;
|
|
|
|
lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
|
|
lambda *= lambda_modifier;
|
|
lambda_uv *= lambda_uv_modifier;
|
|
|
|
ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
|
|
(UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
|
|
ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
|
|
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
|
|
lambda *= lambda_modifier;
|
|
lambda_uv *= lambda_uv_modifier;
|
|
|
|
ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
|
|
(UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
|
|
|
|
ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
|
|
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
|
|
|
|
lambda *= lambda_modifier;
|
|
lambda_uv *= lambda_uv_modifier;
|
|
|
|
ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
|
|
(UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
/* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
|
|
lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
|
|
lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
|
|
|
|
lambda *= lambda_modifier;
|
|
lambda_uv *= lambda_uv_modifier;
|
|
|
|
ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
|
|
(UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
|
|
(LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
|
|
}
|
|
|
|
ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
|
|
(WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
/* Intended to be a barren wasteland! */
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
|
|
*
|
|
* \brief
|
|
* Function whihc calculates the Lambda params for current picture
|
|
*
|
|
* \param[in] ps_enc_ctxt : encoder ctxt pointer
|
|
* \param[in] ps_cur_pic_ctxt : current pic ctxt
|
|
* \param[in] i4_cur_frame_qp : current pic QP
|
|
* \param[in] first_field : is first field flag
|
|
* \param[in] i4_temporal_lyr_id : Current picture layer id
|
|
*
|
|
* \return
|
|
* None
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
|
|
{
|
|
WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
|
|
? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
|
|
: gai1_ihevc_chroma_qp_scale
|
|
[i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
|
|
|
|
/* closed loop ssd lambda is same as final lambda */
|
|
ps_ctxt->i8_cl_ssd_lambda_qf =
|
|
ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
|
|
ps_ctxt
|
|
->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
|
|
ps_ctxt->u4_chroma_cost_weighing_factor =
|
|
ps_ctxt->au4_chroma_cost_weighing_factor_array
|
|
[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
|
|
/* --- Initialized the lambda for SATD computations --- */
|
|
/* --- 0.95 is the multiplication factor as per HM --- */
|
|
/* --- 1.9 is the multiplication factor for Hadamard Transform --- */
|
|
ps_ctxt->i4_satd_lamda =
|
|
ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
|
|
ps_ctxt->i4_sad_lamda =
|
|
ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_update_pred_qp \endif
|
|
*
|
|
* \brief
|
|
* Computes pred qp for the given CU
|
|
*
|
|
* \param[in]
|
|
*
|
|
* \return
|
|
*
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
|
|
{
|
|
WORD32 i4_pred_qp = 0x7FFFFFFF;
|
|
WORD32 i4_top, i4_left;
|
|
if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
|
|
{
|
|
i4_pred_qp = ps_ctxt->i4_prev_QP;
|
|
}
|
|
else
|
|
{
|
|
if(cu_pos_y == 0) /*CTB boundary*/
|
|
{
|
|
i4_top = ps_ctxt->i4_prev_QP;
|
|
}
|
|
else /*within CTB*/
|
|
{
|
|
i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
|
|
}
|
|
if(cu_pos_x == 0) /*CTB boundary*/
|
|
{
|
|
i4_left = ps_ctxt->i4_prev_QP;
|
|
}
|
|
else /*within CTB*/
|
|
{
|
|
i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
|
|
}
|
|
i4_pred_qp = (i4_left + i4_top + 1) >> 1;
|
|
}
|
|
ps_ctxt->i4_pred_qp = i4_pred_qp;
|
|
return;
|
|
}
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_compute_cu_level_QP \endif
|
|
*
|
|
* \brief
|
|
* Computes cu level QP with Traqo,Spatial Mod and In-frame RC
|
|
*
|
|
* \param[in]
|
|
*
|
|
* \return
|
|
*
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_compute_cu_level_QP(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
WORD32 i4_activity_for_qp,
|
|
WORD32 i4_activity_for_lamda,
|
|
WORD32 i4_reduce_qp)
|
|
{
|
|
/*modify quant related param in ctxt based on current cu qp*/
|
|
WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
|
|
WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
|
|
|
|
WORD32 i4_max_qp_allowed;
|
|
WORD32 i4_min_qp_allowed;
|
|
WORD32 i4_pred_qp;
|
|
|
|
i4_pred_qp = ps_ctxt->i4_pred_qp;
|
|
|
|
if(ps_ctxt->i4_sub_pic_level_rc)
|
|
{
|
|
i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
|
|
i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
|
|
}
|
|
else
|
|
{
|
|
i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
|
|
i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
|
|
}
|
|
if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
|
|
return;
|
|
|
|
#if LAMDA_BASED_ON_QUANT
|
|
i4_activity_for_lamda = i4_activity_for_qp;
|
|
#endif
|
|
|
|
if(i4_activity_for_qp != -1)
|
|
{
|
|
cu_qp = (ps_ctxt->ps_rc_quant_ctxt
|
|
->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
|
|
if(ps_ctxt->i4_qp_mod)
|
|
{
|
|
/*Recompute the Qp as per enc thread's frame level Qp*/
|
|
ASSERT(i4_activity_for_qp > 0);
|
|
cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
|
|
QP_LEVEL_MOD_ACT_FACTOR;
|
|
}
|
|
|
|
// To avoid access of uninitialised Qscale to qp conversion table
|
|
if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
|
|
else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
|
|
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
|
|
|
|
if((1 == i4_reduce_qp) && (cu_qp > 1))
|
|
cu_qp--;
|
|
|
|
/*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
|
|
if(cu_qp > i4_max_qp_allowed)
|
|
cu_qp = i4_max_qp_allowed;
|
|
else if(cu_qp < i4_min_qp_allowed)
|
|
cu_qp = i4_min_qp_allowed;
|
|
|
|
/* CLIP to maintain Qp between user configured and min and max Qp values*/
|
|
if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
|
|
else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
|
|
|
|
/*cu qp must be populated in cu_analyse_t struct*/
|
|
ps_ctxt->i4_cu_qp = cu_qp;
|
|
/*recompute quant related param at every cu level*/
|
|
ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
|
|
}
|
|
|
|
/*Decoupling qp and lamda calculation */
|
|
if(i4_activity_for_lamda != -1)
|
|
{
|
|
cu_qp = (ps_ctxt->ps_rc_quant_ctxt
|
|
->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
|
|
|
|
if(ps_ctxt->i4_qp_mod)
|
|
{
|
|
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
|
|
/*Recompute the Qp as per enc thread's frame level Qp*/
|
|
ASSERT(i4_activity_for_lamda > 0);
|
|
cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
|
|
QP_LEVEL_MOD_ACT_FACTOR;
|
|
#endif
|
|
}
|
|
if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
|
|
else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
|
|
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
|
|
|
|
/*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
|
|
if(cu_qp > i4_max_qp_allowed)
|
|
cu_qp = i4_max_qp_allowed;
|
|
else if(cu_qp < i4_min_qp_allowed)
|
|
cu_qp = i4_min_qp_allowed;
|
|
|
|
/* CLIP to maintain Qp between user configured and min and max Qp values*/
|
|
if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
|
|
else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
|
|
cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
|
|
/* get frame level lambda params */
|
|
ihevce_get_cl_cu_lambda_prms(
|
|
ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
|
|
}
|
|
}
|
|
|
|
void ihevce_update_cu_level_qp_lamda(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
|
|
{
|
|
WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
|
|
|
|
if(ps_cu_analyse->u1_cu_size == 64)
|
|
{
|
|
ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter_lamda = 3;
|
|
}
|
|
else if(ps_cu_analyse->u1_cu_size == 32)
|
|
{
|
|
ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter_lamda = 0;
|
|
}
|
|
else if(ps_cu_analyse->u1_cu_size == 16)
|
|
{
|
|
ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter = (trans_size == 8) || (trans_size == 4);
|
|
i4_act_counter_lamda = 0;
|
|
}
|
|
else if(ps_cu_analyse->u1_cu_size == 8)
|
|
{
|
|
ASSERT((trans_size == 8) || (trans_size == 4));
|
|
i4_act_counter = 1;
|
|
i4_act_counter_lamda = 0;
|
|
}
|
|
else
|
|
{
|
|
ASSERT(0);
|
|
}
|
|
|
|
if(ps_ctxt->i4_use_ctb_level_lamda)
|
|
{
|
|
ihevce_compute_cu_level_QP(
|
|
ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
|
|
}
|
|
else
|
|
{
|
|
ihevce_compute_cu_level_QP(
|
|
ps_ctxt,
|
|
ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
|
|
ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
|
|
0);
|
|
}
|
|
|
|
ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
* \if Function name : ihevce_scan_coeffs \endif
|
|
*
|
|
* @brief * Computes the coeff buffer for a coded TU for entropy coding
|
|
*
|
|
* @par Description
|
|
* Computes the coeff buffer for a coded TU for entropy coding
|
|
*
|
|
* \param[in] pi2_quan_coeffs Quantized coefficient context
|
|
*
|
|
* \param[in] scan_idx Scan index specifying the scan order
|
|
*
|
|
* \param[in] trans_size Transform unit size
|
|
*
|
|
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
|
|
*
|
|
* \param[in] pu1_csbf_buf csb flag buffer
|
|
*
|
|
* @returns num_bytes
|
|
* Number of bytes written to pu1_out_data
|
|
*
|
|
* @remarks
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevce_scan_coeffs(
|
|
WORD16 *pi2_quant_coeffs,
|
|
WORD32 *pi4_subBlock2csbfId_map,
|
|
WORD32 scan_idx,
|
|
WORD32 trans_size,
|
|
UWORD8 *pu1_out_data,
|
|
UWORD8 *pu1_csbf_buf,
|
|
WORD32 i4_csbf_stride)
|
|
{
|
|
WORD32 i, trans_unit_idx, num_gt1_flag;
|
|
UWORD16 u2_csbf0flags;
|
|
WORD32 num_bytes = 0;
|
|
UWORD8 *pu1_trans_table;
|
|
UWORD8 *pu1_csb_table;
|
|
WORD32 shift_value, mask_value;
|
|
UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
|
|
UWORD16 u2_sign_flags;
|
|
UWORD16 u2_abs_coeff_remaining[16];
|
|
WORD32 blk_row, blk_col;
|
|
|
|
UWORD8 *pu1_out_data_header;
|
|
UWORD16 *pu2_out_data_coeff;
|
|
|
|
WORD32 x_pos, y_pos;
|
|
WORD32 quant_coeff;
|
|
|
|
WORD32 num_gt0_flag;
|
|
(void)i4_csbf_stride;
|
|
pu1_out_data_header = pu1_out_data;
|
|
/* Need only last 3 bits, rest are reserved for debugging and making */
|
|
/* WORD alignment */
|
|
u2_csbf0flags = 0xBAD0;
|
|
|
|
/* Select proper order for your transform unit and csb based on scan_idx*/
|
|
/* and the trans_size */
|
|
|
|
/* scan order inside a csb */
|
|
pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
|
|
/* GETRANGE will give the log_2 of trans_size to shift_value */
|
|
GETRANGE(shift_value, trans_size);
|
|
shift_value = shift_value - 3; /* for finding. row no. from scan index */
|
|
mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
|
|
switch(trans_size)
|
|
{
|
|
case 32:
|
|
pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
|
|
break;
|
|
case 16:
|
|
pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
|
|
break;
|
|
case 8:
|
|
pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
|
|
break;
|
|
case 4:
|
|
pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
|
|
break;
|
|
default:
|
|
DBG_PRINTF("Invalid Trans Size\n");
|
|
return -1;
|
|
break;
|
|
}
|
|
|
|
/*go through each csb in the scan order for first non-zero coded sub-block*/
|
|
for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
|
|
{
|
|
/* check for the first csb flag in our scan order */
|
|
if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
|
|
{
|
|
UWORD8 u1_last_x, u1_last_y;
|
|
/* row of csb */
|
|
blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
|
|
/* col of csb */
|
|
blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
|
|
|
|
/*check for the 1st non-0 values inside the csb in our scan order*/
|
|
for(i = 15; i >= 0; i--)
|
|
{
|
|
x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
|
|
y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
|
|
|
|
quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
|
|
|
|
if(quant_coeff != 0)
|
|
break;
|
|
}
|
|
|
|
ASSERT(i >= 0);
|
|
|
|
u1_last_x = x_pos;
|
|
u1_last_y = y_pos;
|
|
|
|
/* storing last_x and last_y */
|
|
*pu1_out_data_header = u1_last_x;
|
|
pu1_out_data_header++;
|
|
num_bytes++;
|
|
*pu1_out_data_header = u1_last_y;
|
|
pu1_out_data_header++;
|
|
num_bytes++;
|
|
|
|
/* storing the scan order */
|
|
*pu1_out_data_header = scan_idx;
|
|
pu1_out_data_header++;
|
|
num_bytes++;
|
|
/* storing last_sub_block pos. in scan order count */
|
|
*pu1_out_data_header = trans_unit_idx;
|
|
pu1_out_data_header++;
|
|
num_bytes++;
|
|
|
|
/*stored the first 4 bytes, now all are word16. So word16 pointer*/
|
|
pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
|
|
|
|
/* u2_csbf0flags word */
|
|
u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
|
|
/* storing u2_csbf0flags word */
|
|
*pu2_out_data_coeff = u2_csbf0flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
num_gt0_flag = 1;
|
|
num_gt1_flag = 0;
|
|
u2_sign_flags = 0;
|
|
|
|
/* set the i th bit of u2_sig_coeff_abs_gt0_flags */
|
|
u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
|
|
if(abs(quant_coeff) > 1)
|
|
{
|
|
/* set the i th bit of u2_sig_coeff_abs_gt1_flags */
|
|
u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
|
|
/* update u2_abs_coeff_remaining */
|
|
u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
|
|
|
|
num_gt1_flag++;
|
|
}
|
|
|
|
if(quant_coeff < 0)
|
|
{
|
|
/* set the i th bit of u2_sign_flags */
|
|
u2_sign_flags = u2_sign_flags | (1 << i);
|
|
}
|
|
|
|
/* Test remaining elements in our scan order */
|
|
/* Can optimize further by CLZ macro */
|
|
for(i = i - 1; i >= 0; i--)
|
|
{
|
|
x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
|
|
y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
|
|
|
|
quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
|
|
|
|
if(quant_coeff != 0)
|
|
{
|
|
/* set the i th bit of u2_sig_coeff_abs_gt0_flags */
|
|
u2_sig_coeff_abs_gt0_flags |= (1 << i);
|
|
|
|
if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
|
|
{
|
|
/* set the i th bit of u2_sig_coeff_abs_gt1_flags */
|
|
u2_sig_coeff_abs_gt1_flags |= (1 << i);
|
|
|
|
/* update u2_abs_coeff_remaining */
|
|
u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
|
|
|
|
num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
|
|
}
|
|
|
|
if(quant_coeff < 0)
|
|
{
|
|
/* set the i th bit of u2_sign_flags */
|
|
u2_sign_flags |= (1 << i);
|
|
}
|
|
|
|
num_gt0_flag++;
|
|
}
|
|
}
|
|
|
|
/* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
/* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
/* storing u2_sign_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sign_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
/* Store the u2_abs_coeff_remaining[] */
|
|
for(i = 0; i < num_gt1_flag; i++)
|
|
{
|
|
/* storing u2_abs_coeff_remaining[i] 2 bytes */
|
|
*pu2_out_data_coeff = u2_abs_coeff_remaining[i];
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
}
|
|
|
|
break; /*We just need this loop for finding 1st non-zero csb only*/
|
|
}
|
|
}
|
|
|
|
/* go through remaining csb in the scan order */
|
|
for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
|
|
{
|
|
blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
|
|
blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
|
|
|
|
/* u2_csbf0flags word */
|
|
u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
|
|
(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
|
|
|
|
/********************************************************************/
|
|
/* Minor hack: As per HEVC spec csbf in not signalled in stream for */
|
|
/* block0, instead sig coeff map is directly signalled. This is */
|
|
/* taken care by forcing csbf for block0 to be 1 even if it is 0 */
|
|
/********************************************************************/
|
|
if(0 == trans_unit_idx)
|
|
{
|
|
u2_csbf0flags |= 1;
|
|
}
|
|
|
|
if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
|
|
{
|
|
if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
|
|
{
|
|
/* set the 2nd bit of u2_csbf0flags for right csbf */
|
|
u2_csbf0flags = u2_csbf0flags | (1 << 1);
|
|
}
|
|
}
|
|
if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
|
|
{
|
|
if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
|
|
{
|
|
/* set the 3rd bit of u2_csbf0flags for bottom csbf */
|
|
u2_csbf0flags = u2_csbf0flags | (1 << 2);
|
|
}
|
|
}
|
|
|
|
/* storing u2_csbf0flags word */
|
|
*pu2_out_data_coeff = u2_csbf0flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
/* check for the csb flag in our scan order */
|
|
if(u2_csbf0flags & 0x1)
|
|
{
|
|
u2_sig_coeff_abs_gt0_flags = 0;
|
|
u2_sig_coeff_abs_gt1_flags = 0;
|
|
u2_sign_flags = 0;
|
|
|
|
num_gt0_flag = 0;
|
|
num_gt1_flag = 0;
|
|
/* check for the non-0 values inside the csb in our scan order */
|
|
/* Can optimize further by CLZ macro */
|
|
for(i = 15; i >= 0; i--)
|
|
{
|
|
x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
|
|
y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
|
|
|
|
quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
|
|
|
|
if(quant_coeff != 0)
|
|
{
|
|
/* set the i th bit of u2_sig_coeff_abs_gt0_flags */
|
|
u2_sig_coeff_abs_gt0_flags |= (1 << i);
|
|
|
|
if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
|
|
{
|
|
/* set the i th bit of u2_sig_coeff_abs_gt1_flags */
|
|
u2_sig_coeff_abs_gt1_flags |= (1 << i);
|
|
|
|
/* update u2_abs_coeff_remaining */
|
|
u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
|
|
|
|
num_gt1_flag++;
|
|
}
|
|
|
|
if(quant_coeff < 0)
|
|
{
|
|
/* set the i th bit of u2_sign_flags */
|
|
u2_sign_flags = u2_sign_flags | (1 << i);
|
|
}
|
|
|
|
num_gt0_flag++;
|
|
}
|
|
}
|
|
|
|
/* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
/* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
/* storing u2_sign_flags 2 bytes */
|
|
*pu2_out_data_coeff = u2_sign_flags;
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
|
|
/* Store the u2_abs_coeff_remaining[] */
|
|
for(i = 0; i < num_gt1_flag; i++)
|
|
{
|
|
/* storing u2_abs_coeff_remaining[i] 2 bytes */
|
|
*pu2_out_data_coeff = u2_abs_coeff_remaining[i];
|
|
pu2_out_data_coeff++;
|
|
num_bytes += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
return num_bytes; /* Return the number of bytes written to out_data */
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
* \if Function name : ihevce_populate_intra_pred_mode \endif
|
|
*
|
|
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
|
|
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
|
|
*
|
|
* \par Description
|
|
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
|
|
* for a CU
|
|
*
|
|
* \param[in] top_intra_mode Top intra mode
|
|
* \param[in] left_intra_mode Left intra mode
|
|
* \param[in] available_top Top availability flag
|
|
* \param[in] available_left Left availability flag
|
|
* \param[in] cu_pos_y CU 'y' position
|
|
* \param[in] ps_cand_mode_list pointer to populate candidate list
|
|
*
|
|
* \returns none
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
void ihevce_populate_intra_pred_mode(
|
|
WORD32 top_intra_mode,
|
|
WORD32 left_intra_mode,
|
|
WORD32 available_top,
|
|
WORD32 available_left,
|
|
WORD32 cu_pos_y,
|
|
WORD32 *ps_cand_mode_list)
|
|
{
|
|
/* local variables */
|
|
WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
|
|
|
|
/* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
|
|
/* N = top */
|
|
if(0 == available_top)
|
|
{
|
|
cand_intra_pred_mode_top = INTRA_DC;
|
|
}
|
|
/* for neighbour != INTRA, setting DC is done outside */
|
|
else if(0 == cu_pos_y) /* It's on the CTB boundary */
|
|
{
|
|
cand_intra_pred_mode_top = INTRA_DC;
|
|
}
|
|
else
|
|
{
|
|
cand_intra_pred_mode_top = top_intra_mode;
|
|
}
|
|
|
|
/* N = left */
|
|
if(0 == available_left)
|
|
{
|
|
cand_intra_pred_mode_left = INTRA_DC;
|
|
}
|
|
/* for neighbour != INTRA, setting DC is done outside */
|
|
else
|
|
{
|
|
cand_intra_pred_mode_left = left_intra_mode;
|
|
}
|
|
|
|
/* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
|
|
if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
|
|
{
|
|
if(cand_intra_pred_mode_left < 2)
|
|
{
|
|
ps_cand_mode_list[0] = INTRA_PLANAR;
|
|
ps_cand_mode_list[1] = INTRA_DC;
|
|
ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
|
|
}
|
|
else
|
|
{
|
|
ps_cand_mode_list[0] = cand_intra_pred_mode_left;
|
|
ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
|
|
ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ps_cand_mode_list[0] = cand_intra_pred_mode_left;
|
|
ps_cand_mode_list[1] = cand_intra_pred_mode_top;
|
|
|
|
if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
|
|
(cand_intra_pred_mode_top != INTRA_PLANAR))
|
|
{
|
|
ps_cand_mode_list[2] = INTRA_PLANAR;
|
|
}
|
|
else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
|
|
{
|
|
ps_cand_mode_list[2] = INTRA_DC;
|
|
}
|
|
else
|
|
{
|
|
ps_cand_mode_list[2] = INTRA_ANGULAR(26);
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
*******************************************************************************
|
|
* \if Function name : ihevce_intra_pred_mode_signaling \endif
|
|
*
|
|
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
|
|
* b5_rem_intra_pred_mode for a CU
|
|
*
|
|
* \par Description
|
|
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
|
|
* for a CU
|
|
*
|
|
* \param[in] ps_nbr_top Top neighbour context
|
|
* \param[in] ps_nbr_left Left neighbour context
|
|
* \param[in] available_top Top availability flag
|
|
* \param[in] available_left Left availability flag
|
|
* \param[in] cu_pos_y CU 'y' position
|
|
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
|
|
* \param[inout] ps_intra_pred_mode_current
|
|
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
|
|
* b5_rem_intra_pred_mode
|
|
*
|
|
* \returns none
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
void ihevce_intra_pred_mode_signaling(
|
|
WORD32 top_intra_mode,
|
|
WORD32 left_intra_mode,
|
|
WORD32 available_top,
|
|
WORD32 available_left,
|
|
WORD32 cu_pos_y,
|
|
WORD32 luma_intra_pred_mode_current,
|
|
intra_prev_rem_flags_t *ps_intra_pred_mode_current)
|
|
{
|
|
/* local variables */
|
|
WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
|
|
WORD32 cand_mode_list[3];
|
|
|
|
ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
|
|
ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose
|
|
ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
|
|
|
|
/* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
|
|
/* N = top */
|
|
if(0 == available_top)
|
|
{
|
|
cand_intra_pred_mode_top = INTRA_DC;
|
|
}
|
|
/* for neighbour != INTRA, setting DC is done outside */
|
|
else if(0 == cu_pos_y) /* It's on the CTB boundary */
|
|
{
|
|
cand_intra_pred_mode_top = INTRA_DC;
|
|
}
|
|
else
|
|
{
|
|
cand_intra_pred_mode_top = top_intra_mode;
|
|
}
|
|
|
|
/* N = left */
|
|
if(0 == available_left)
|
|
{
|
|
cand_intra_pred_mode_left = INTRA_DC;
|
|
}
|
|
/* for neighbour != INTRA, setting DC is done outside */
|
|
else
|
|
{
|
|
cand_intra_pred_mode_left = left_intra_mode;
|
|
}
|
|
|
|
/* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
|
|
if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
|
|
{
|
|
if(cand_intra_pred_mode_left < 2)
|
|
{
|
|
cand_mode_list[0] = INTRA_PLANAR;
|
|
cand_mode_list[1] = INTRA_DC;
|
|
cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
|
|
}
|
|
else
|
|
{
|
|
cand_mode_list[0] = cand_intra_pred_mode_left;
|
|
cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
|
|
cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cand_mode_list[0] = cand_intra_pred_mode_left;
|
|
cand_mode_list[1] = cand_intra_pred_mode_top;
|
|
|
|
if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
|
|
(cand_intra_pred_mode_top != INTRA_PLANAR))
|
|
{
|
|
cand_mode_list[2] = INTRA_PLANAR;
|
|
}
|
|
else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
|
|
{
|
|
cand_mode_list[2] = INTRA_DC;
|
|
}
|
|
else
|
|
{
|
|
cand_mode_list[2] = INTRA_ANGULAR(26);
|
|
}
|
|
}
|
|
|
|
/* Signal Generation */
|
|
|
|
/* Flag & mpm_index generation */
|
|
if(cand_mode_list[0] == luma_intra_pred_mode_current)
|
|
{
|
|
ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
|
|
ps_intra_pred_mode_current->b2_mpm_idx = 0;
|
|
}
|
|
else if(cand_mode_list[1] == luma_intra_pred_mode_current)
|
|
{
|
|
ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
|
|
ps_intra_pred_mode_current->b2_mpm_idx = 1;
|
|
}
|
|
else if(cand_mode_list[2] == luma_intra_pred_mode_current)
|
|
{
|
|
ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
|
|
ps_intra_pred_mode_current->b2_mpm_idx = 2;
|
|
}
|
|
/* Flag & b5_rem_intra_pred_mode generation */
|
|
else
|
|
{
|
|
WORD32 rem_mode;
|
|
|
|
ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
|
|
|
|
/* sorting cand_mode_list */
|
|
if(cand_mode_list[0] > cand_mode_list[1])
|
|
{
|
|
SWAP(cand_mode_list[0], cand_mode_list[1]);
|
|
}
|
|
if(cand_mode_list[0] > cand_mode_list[2])
|
|
{
|
|
SWAP(cand_mode_list[0], cand_mode_list[2]);
|
|
}
|
|
if(cand_mode_list[1] > cand_mode_list[2])
|
|
{
|
|
SWAP(cand_mode_list[1], cand_mode_list[2]);
|
|
}
|
|
|
|
rem_mode = luma_intra_pred_mode_current;
|
|
|
|
if((rem_mode) >= cand_mode_list[2])
|
|
{
|
|
(rem_mode)--;
|
|
}
|
|
if((rem_mode) >= cand_mode_list[1])
|
|
{
|
|
(rem_mode)--;
|
|
}
|
|
if((rem_mode) >= cand_mode_list[0])
|
|
{
|
|
(rem_mode)--;
|
|
}
|
|
ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
|
|
}
|
|
}
|
|
|
|
void ihevce_quant_rounding_factor_gen(
|
|
WORD32 i4_trans_size,
|
|
WORD32 is_luma,
|
|
rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
|
|
WORD32 *pi4_quant_round_0_1,
|
|
WORD32 *pi4_quant_round_1_2,
|
|
double i4_lamda_modifier,
|
|
UWORD8 i4_is_tu_level_quant_rounding)
|
|
{
|
|
//WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
|
|
UWORD8 *pu1_ctxt_model;
|
|
WORD32 scan_pos;
|
|
WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
|
|
WORD32 abs_gt1_base_ctxt;
|
|
WORD32 log2_tr_size, i;
|
|
UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
|
|
UWORD16 u4_bits_estimated_r1_temp;
|
|
WORD32 j = 0;
|
|
WORD32 k = 0;
|
|
WORD32 temp2;
|
|
|
|
double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
|
|
LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
|
|
/* transform size to log2transform size */
|
|
GETRANGE(log2_tr_size, i4_trans_size);
|
|
log2_tr_size -= 1;
|
|
|
|
if(1 == i4_is_tu_level_quant_rounding)
|
|
{
|
|
entropy_context_t *ps_cur_tu_entropy;
|
|
cab_ctxt_t *ps_cabac;
|
|
WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
|
|
ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
|
|
|
|
ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
|
|
|
|
pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
|
|
}
|
|
else
|
|
{
|
|
pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
|
|
}
|
|
/*If transform size is 4x4, then only one sub-block*/
|
|
if(is_luma)
|
|
{
|
|
sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
|
|
abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
|
|
|
|
if(3 == log2_tr_size)
|
|
{
|
|
/* 8x8 transform size */
|
|
/* Assuming diagnol scan idx for now */
|
|
sig_coeff_base_ctxt += 9;
|
|
}
|
|
else if(3 < log2_tr_size)
|
|
{
|
|
/* larger transform sizes */
|
|
sig_coeff_base_ctxt += 21;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* chroma context initializations */
|
|
sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
|
|
abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
|
|
|
|
if(3 == log2_tr_size)
|
|
{
|
|
/* 8x8 transform size */
|
|
sig_coeff_base_ctxt += 9;
|
|
}
|
|
else if(3 < log2_tr_size)
|
|
{
|
|
/* larger transform sizes */
|
|
sig_coeff_base_ctxt += 12;
|
|
}
|
|
}
|
|
|
|
/*Transform size of 4x4 will have only a single CSB */
|
|
/* derive the context inc as per section 9.3.3.1.4 */
|
|
|
|
if(2 == log2_tr_size)
|
|
{
|
|
UWORD8 sig_ctxinc;
|
|
WORD32 state_mps;
|
|
WORD32 gt1_ctxt = 0;
|
|
WORD32 ctxt_set = 0;
|
|
WORD32 ctxt_idx = 0;
|
|
|
|
/* context set based on luma subblock pos */
|
|
|
|
/* Encodet the abs level gt1 bins */
|
|
/* Currently calculating trade off between mps(2) and mps(1)*/
|
|
/* The estimation has to be further done for mps(11) and mps(111)*/
|
|
/*ctxt_set = 0 as transform 4x4 has only one csb with DC */
|
|
/* gt1_ctxt = 0 for the co-ef value to be 2 */
|
|
|
|
ctxt_set = gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
|
|
|
|
u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
|
|
for(scan_pos = 0; scan_pos < 16; scan_pos++)
|
|
{
|
|
*(pi4_quant_round_1_2 + scan_pos) = temp2;
|
|
}
|
|
|
|
for(scan_pos = 0; scan_pos < 16; scan_pos++)
|
|
{
|
|
//UWORD8 nbr_csbf = 1;
|
|
/* derive the x,y pos */
|
|
UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
|
|
|
|
/* 4x4 transform size increment uses lookup */
|
|
sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
|
|
|
|
/*Get the mps state based on ctxt modes */
|
|
state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 0 */
|
|
u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
//
|
|
u4_bits_estimated_r1 =
|
|
(gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
|
|
|
|
QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
|
|
*(pi4_quant_round_0_1 + scan_pos) = temp2;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
|
|
WORD32 is_nbr_csb_state_mps;
|
|
|
|
WORD32 state_mps;
|
|
WORD32 gt1_ctxt = 0;
|
|
WORD32 ctxt_set = 0;
|
|
WORD32 ctxt_idx;
|
|
/*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
|
|
/*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
|
|
|
|
/*ctxt_set = 0 DC subblock, the previous state did not have 2
|
|
ctxt_set = 1 DC subblock, the previous state did have >= 2
|
|
ctxt_set = 2 AC subblock, the previous state did not have 2
|
|
ctxt_set = 3 AC subblock, the previous state did have >= 2*/
|
|
i = 1;
|
|
ctxt_set = (i && is_luma) ? 2 : 0;
|
|
|
|
ctxt_set++;
|
|
|
|
/*0th position indicates the probability of 2 */
|
|
/*1th position indicates the probability of 1 */
|
|
/*2th position indicates the probability of 11 */
|
|
/*3th position indicates the probability of 111 */
|
|
|
|
gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
|
|
|
|
u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
|
|
|
|
for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
|
|
{
|
|
*(pi4_quant_round_1_2 + scan_pos) = temp2;
|
|
}
|
|
|
|
i = 0;
|
|
ctxt_set = (i && is_luma) ? 2 : 0;
|
|
ctxt_set++;
|
|
|
|
/*0th position indicates the probability of 2 */
|
|
/*1th position indicates the probability of 1 */
|
|
/*2th position indicates the probability of 11 */
|
|
/*3th position indicates the probability of 111 */
|
|
|
|
gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
|
|
|
|
u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
|
|
|
|
for(scan_pos = 0; scan_pos < 16; scan_pos++)
|
|
{
|
|
*(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
|
|
}
|
|
|
|
{
|
|
WORD32 ctxt_idx;
|
|
|
|
WORD32 nbr_csbf_0, nbr_csbf_1;
|
|
WORD32 state_mps_0, state_mps_1;
|
|
ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
|
|
ctxt_idx += is_luma ? 0 : 2;
|
|
|
|
/* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
|
|
/* if neibhor not available, ctxt idx = 0*/
|
|
nbr_csbf_0 = 0;
|
|
ctxt_idx += nbr_csbf_0 ? 1 : 0;
|
|
state_mps_0 = pu1_ctxt_model[ctxt_idx];
|
|
|
|
nbr_csbf_1 = 1;
|
|
ctxt_idx += nbr_csbf_1 ? 1 : 0;
|
|
state_mps_1 = pu1_ctxt_model[ctxt_idx];
|
|
|
|
is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
|
|
}
|
|
|
|
if(1 == is_nbr_csb_state_mps)
|
|
{
|
|
for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
|
|
{
|
|
UWORD8 sig_ctxinc;
|
|
WORD32 state_mps;
|
|
WORD32 gt1_ctxt = 0;
|
|
WORD32 ctxt_set = 0;
|
|
|
|
WORD32 ctxt_idx;
|
|
|
|
/*Check if the cabac states had previous nbr available */
|
|
|
|
if(i == 0)
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
|
|
else if(i < (i4_trans_size >> 2))
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
|
|
else if((i % (i4_trans_size >> 2)) == 0)
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
|
|
else
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
|
|
|
|
if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
|
|
k++;
|
|
|
|
j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
|
|
/*ctxt_set = 0 DC subblock, the previous state did not have 2
|
|
ctxt_set = 1 DC subblock, the previous state did have >= 2
|
|
ctxt_set = 2 AC subblock, the previous state did not have 2
|
|
ctxt_set = 3 AC subblock, the previous state did have >= 2*/
|
|
|
|
ctxt_set = (i && is_luma) ? 2 : 0;
|
|
|
|
/* gt1_ctxt = 1 for the co-ef value to be 1 */
|
|
gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
for(scan_pos = 0; scan_pos < 16; scan_pos++)
|
|
{
|
|
UWORD8 y_pos_x_pos;
|
|
|
|
if(scan_pos || i)
|
|
{
|
|
y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
|
|
/* ctxt for AC coeff depends on curpos and neigbour csbf */
|
|
sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
|
|
|
|
/* based on luma subblock pos */
|
|
sig_ctxinc += (i && is_luma) ? 3 : 0;
|
|
|
|
sig_ctxinc += sig_coeff_base_ctxt;
|
|
}
|
|
else
|
|
{
|
|
/*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
|
|
/* DC coeff has fixed context for luma and chroma */
|
|
sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
|
|
}
|
|
|
|
/*Get the mps state based on ctxt modes */
|
|
state_mps = pu1_ctxt_model[sig_ctxinc];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 0 */
|
|
u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
u4_bits_estimated_r1 =
|
|
(gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
|
|
{
|
|
QUANT_ROUND_FACTOR(
|
|
temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
|
|
*(pi4_quant_round_0_1 +
|
|
((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
|
|
Hence will write the same value to all sub block, and overwrite for the 1st one */
|
|
i = 1;
|
|
{
|
|
UWORD8 sig_ctxinc;
|
|
UWORD8 y_pos_x_pos;
|
|
WORD32 quant_rounding_0_1;
|
|
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
|
|
|
|
scan_pos = 0;
|
|
y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
|
|
/* ctxt for AC coeff depends on curpos and neigbour csbf */
|
|
sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
|
|
|
|
/* based on luma subblock pos */
|
|
sig_ctxinc += (is_luma) ? 3 : 0;
|
|
|
|
sig_ctxinc += sig_coeff_base_ctxt;
|
|
|
|
/*Get the mps state based on ctxt modes */
|
|
state_mps = pu1_ctxt_model[sig_ctxinc];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 0 */
|
|
u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
u4_bits_estimated_r1 =
|
|
(gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
|
|
|
|
/*ctxt_set = 0 DC subblock, the previous state did not have 2
|
|
ctxt_set = 1 DC subblock, the previous state did have >= 2
|
|
ctxt_set = 2 AC subblock, the previous state did not have 2
|
|
ctxt_set = 3 AC subblock, the previous state did have >= 2*/
|
|
|
|
ctxt_set = (i && is_luma) ? 2 : 0;
|
|
|
|
/* gt1_ctxt = 1 for the co-ef value to be 1 */
|
|
gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
QUANT_ROUND_FACTOR(
|
|
quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
|
|
|
|
for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
|
|
scan_pos++)
|
|
{
|
|
*(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
|
|
}
|
|
}
|
|
|
|
/*First Subblock*/
|
|
i = 0;
|
|
|
|
{
|
|
UWORD8 sig_ctxinc;
|
|
WORD32 state_mps;
|
|
WORD32 gt1_ctxt = 0;
|
|
WORD32 ctxt_set = 0;
|
|
|
|
WORD32 ctxt_idx;
|
|
|
|
/*Check if the cabac states had previous nbr available */
|
|
|
|
{
|
|
pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
|
|
|
|
/*ctxt_set = 0 DC subblock, the previous state did not have 2
|
|
ctxt_set = 1 DC subblock, the previous state did have >= 2
|
|
ctxt_set = 2 AC subblock, the previous state did not have 2
|
|
ctxt_set = 3 AC subblock, the previous state did have >= 2*/
|
|
ctxt_set = (i && is_luma) ? 2 : 0;
|
|
|
|
/* gt1_ctxt = 1 for the co-ef value to be 1 */
|
|
gt1_ctxt = 0;
|
|
ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
|
|
|
|
state_mps = pu1_ctxt_model[ctxt_idx];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
for(scan_pos = 0; scan_pos < 16; scan_pos++)
|
|
{
|
|
UWORD8 y_pos_x_pos;
|
|
|
|
if(scan_pos)
|
|
{
|
|
y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
|
|
/* ctxt for AC coeff depends on curpos and neigbour csbf */
|
|
sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
|
|
|
|
/* based on luma subblock pos */
|
|
sig_ctxinc += (i && is_luma) ? 3 : 0;
|
|
|
|
sig_ctxinc += sig_coeff_base_ctxt;
|
|
}
|
|
else
|
|
{
|
|
/*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
|
|
/* DC coeff has fixed context for luma and chroma */
|
|
sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
|
|
}
|
|
|
|
/*Get the mps state based on ctxt modes */
|
|
state_mps = pu1_ctxt_model[sig_ctxinc];
|
|
|
|
/* Bits taken to encode sig co-ef flag as 0 */
|
|
u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
|
|
|
|
u4_bits_estimated_r1 =
|
|
(gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
|
|
|
|
/* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
|
|
u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
|
|
{
|
|
QUANT_ROUND_FACTOR(
|
|
temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
|
|
*(pi4_quant_round_0_1 +
|
|
((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
|
|
*
|
|
* \brief
|
|
* Transform unit level (Luma) enc_loop function
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] pu1_pred pointer to predicted data buffer
|
|
* \param[in] pred_strd predicted buffer stride
|
|
* \param[in] pu1_src pointer to source data buffer
|
|
* \param[in] src_strd source buffer stride
|
|
* \param[in] pi2_deq_data pointer to store iq data
|
|
* \param[in] deq_data_strd iq data buffer stride
|
|
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
|
|
* \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
|
|
* block
|
|
* \param[out] csbf_strd csbf buffer stride
|
|
* \param[in] trans_size transform size (4, 8, 16,32)
|
|
* \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
|
|
* \param[out] pi4_cost pointer to store the cost
|
|
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
|
|
* coeff buffer
|
|
* \param[out] pu4_tu_bits pointer to store the best TU bits required encode
|
|
the current TU in RDopt Mode
|
|
* \param[out] pu4_blk_sad pointer to store the block sad for RC
|
|
* \param[out] pi4_zero_col pointer to store the zero_col info for the TU
|
|
* \param[out] pi4_zero_row pointer to store the zero_row info for the TU
|
|
* \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not
|
|
* \param[in] i4_perform_sbh Indicates if SBH should be performed or not
|
|
*
|
|
* \return
|
|
* CBF of the current block
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
|
|
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 pred_strd,
|
|
UWORD8 *pu1_src,
|
|
WORD32 src_strd,
|
|
WORD16 *pi2_deq_data,
|
|
WORD32 deq_data_strd,
|
|
UWORD8 *pu1_recon,
|
|
WORD32 i4_recon_stride,
|
|
UWORD8 *pu1_ecd_data,
|
|
UWORD8 *pu1_csbf_buf,
|
|
WORD32 csbf_strd,
|
|
WORD32 trans_size,
|
|
WORD32 packed_pred_mode,
|
|
LWORD64 *pi8_cost,
|
|
WORD32 *pi4_coeff_off,
|
|
WORD32 *pi4_tu_bits,
|
|
UWORD32 *pu4_blk_sad,
|
|
WORD32 *pi4_zero_col,
|
|
WORD32 *pi4_zero_row,
|
|
UWORD8 *pu1_is_recon_available,
|
|
WORD32 i4_perform_rdoq,
|
|
WORD32 i4_perform_sbh,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy,
|
|
#endif
|
|
SSD_TYPE_T e_ssd_type,
|
|
WORD32 early_cbf)
|
|
{
|
|
WORD32 cbf = 0;
|
|
WORD32 trans_idx;
|
|
WORD32 quant_scale_mat_offset;
|
|
WORD32 *pi4_trans_scratch;
|
|
WORD16 *pi2_trans_values;
|
|
WORD16 *pi2_quant_coeffs;
|
|
WORD32 *pi4_subBlock2csbfId_map = NULL;
|
|
|
|
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
|
|
#endif
|
|
|
|
rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
|
|
|
|
WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
|
|
(ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
|
|
WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
|
|
WORD8 intra_flag = 0;
|
|
ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
|
|
|
|
*pi4_tu_bits = 0;
|
|
*pi4_coeff_off = 0;
|
|
pu1_is_recon_available[0] = 0;
|
|
|
|
if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
|
|
{
|
|
if(e_ssd_type != NULL_TYPE)
|
|
{
|
|
/* SSD cost is stored to the pointer */
|
|
pi8_cost[0] =
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
|
|
pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
|
|
/* copy pred to recon for skip mode */
|
|
if(SPATIAL_DOMAIN_SSD == e_ssd_type)
|
|
{
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
else
|
|
{
|
|
pu1_is_recon_available[0] = 0;
|
|
}
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
pi8_cost[0] = UINT_MAX;
|
|
}
|
|
|
|
/* cbf is returned as 0 */
|
|
return (0);
|
|
}
|
|
|
|
/* derive context variables */
|
|
pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
|
|
pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
|
|
pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
|
|
|
|
/* translate the transform size to index for 4x4 and 8x8 */
|
|
trans_idx = trans_size >> 2;
|
|
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
quant_scale_mat_offset = 0;
|
|
intra_flag = 1;
|
|
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
ai4_quant_rounding_factors[0][0] =
|
|
MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
|
|
for(i = 0; i < trans_size * trans_size; i++)
|
|
{
|
|
ai4_quant_rounding_factors[1][i] =
|
|
MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
|
|
(1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
ai4_quant_rounding_factors[2][i] =
|
|
MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
|
|
(1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
quant_scale_mat_offset = NUM_TRANS_TYPES;
|
|
}
|
|
/* for intra 4x4 DST transform should be used */
|
|
if((1 == trans_idx) && (1 == intra_flag))
|
|
{
|
|
trans_idx = 0;
|
|
}
|
|
/* for 16x16 cases */
|
|
else if(16 == trans_size)
|
|
{
|
|
trans_idx = 3;
|
|
}
|
|
/* for 32x32 cases */
|
|
else if(32 == trans_size)
|
|
{
|
|
trans_idx = 4;
|
|
}
|
|
|
|
switch(trans_size)
|
|
{
|
|
case 4:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
|
|
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
|
|
|
|
break;
|
|
}
|
|
case 16:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
|
|
|
|
break;
|
|
}
|
|
case 32:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Do not call the FT and Quant functions if early_cbf is 0 */
|
|
if(1 == early_cbf)
|
|
{
|
|
/* ---------- call residue and transform block ------- */
|
|
*pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
|
|
pu1_src,
|
|
pu1_pred,
|
|
pi4_trans_scratch,
|
|
pi2_trans_values,
|
|
src_strd,
|
|
pred_strd,
|
|
trans_size,
|
|
NULL_PLANE);
|
|
|
|
cbf = ps_ctxt->apf_quant_iquant_ssd
|
|
[i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
|
|
pi2_trans_values,
|
|
ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
|
|
pi2_quant_coeffs,
|
|
pi2_deq_data,
|
|
trans_size,
|
|
ps_ctxt->i4_cu_qp_div6,
|
|
ps_ctxt->i4_cu_qp_mod6,
|
|
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
ps_ctxt->i4_quant_rnd_factor[intra_flag],
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
#else
|
|
intra_flag ? ai4_quant_rounding_factors[0][0]
|
|
: ps_ctxt->i4_quant_rnd_factor[intra_flag],
|
|
intra_flag ? ai4_quant_rounding_factors[1]
|
|
: ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
intra_flag ? ai4_quant_rounding_factors[2]
|
|
: ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
#endif
|
|
trans_size,
|
|
trans_size,
|
|
deq_data_strd,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
pi4_zero_col,
|
|
pi4_zero_row,
|
|
ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
|
|
pi8_cost);
|
|
|
|
if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
|
|
{
|
|
pi8_cost[0] = UINT_MAX;
|
|
}
|
|
}
|
|
|
|
if(0 != cbf)
|
|
{
|
|
if(i4_perform_sbh || i4_perform_rdoq)
|
|
{
|
|
ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
|
|
ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
|
|
ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
|
|
|
|
ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
|
|
ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
|
|
ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
|
|
ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
|
|
ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
|
|
|
|
ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
|
|
ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
|
|
ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
|
|
ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
|
|
ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
|
|
ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
|
|
|
|
/* ------- call coeffs scan function ------- */
|
|
if((!i4_perform_rdoq))
|
|
{
|
|
ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
|
|
|
|
pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
|
|
}
|
|
}
|
|
|
|
*pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
|
|
pi2_quant_coeffs,
|
|
pi4_subBlock2csbfId_map,
|
|
ps_ctxt->i4_scan_idx,
|
|
trans_size,
|
|
pu1_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd);
|
|
}
|
|
*pi8_cost >>= ga_trans_shift[trans_idx];
|
|
|
|
#if RDOPT_ZERO_CBF_ENABLE
|
|
/* compare null cbf cost with encode tu rd-cost */
|
|
if(cbf != 0)
|
|
{
|
|
WORD32 tu_bits;
|
|
LWORD64 tu_rd_cost;
|
|
|
|
LWORD64 zero_cbf_cost = 0;
|
|
|
|
/*Populating the feilds of rdoq_ctxt structure*/
|
|
if(i4_perform_rdoq)
|
|
{
|
|
/* transform size to log2transform size */
|
|
GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
|
|
ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
|
|
ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
|
|
ps_rdoq_sbh_ctxt->i4_is_luma = 1;
|
|
ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
|
|
ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
|
|
(1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
|
|
ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
|
|
ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
|
|
ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
|
|
}
|
|
else if(i4_perform_zcbf)
|
|
{
|
|
zero_cbf_cost =
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
|
|
pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* call the entropy rdo encode to get the bit estimate for current tu */
|
|
/* note that tu includes only residual coding bits and does not include */
|
|
/* tu split, cbf and qp delta encoding bits for a TU */
|
|
/************************************************************************/
|
|
if(i4_perform_rdoq)
|
|
{
|
|
tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
(pu1_ecd_data),
|
|
trans_size,
|
|
1,
|
|
ps_rdoq_sbh_ctxt,
|
|
pi8_cost,
|
|
&zero_cbf_cost,
|
|
0);
|
|
|
|
if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
|
|
{
|
|
cbf = 0;
|
|
*pi4_coeff_off = 0;
|
|
}
|
|
|
|
if((i4_perform_sbh) && (0 != cbf))
|
|
{
|
|
ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
|
|
ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
|
|
*pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
|
|
}
|
|
|
|
/*Add round value before normalizing*/
|
|
*pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
|
|
*pi8_cost >>= ga_trans_shift[trans_idx];
|
|
|
|
if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
|
|
{
|
|
pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
|
|
*pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
|
|
pi2_quant_coeffs,
|
|
pi4_subBlock2csbfId_map,
|
|
ps_ctxt->i4_scan_idx,
|
|
trans_size,
|
|
pu1_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
tu_bits = ihevce_entropy_rdo_encode_tu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
|
|
}
|
|
|
|
*pi4_tu_bits = tu_bits;
|
|
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
*pi8_cost = ihevce_it_recon_ssd(
|
|
ps_ctxt,
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi2_deq_data,
|
|
deq_data_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
packed_pred_mode,
|
|
cbf,
|
|
*pi4_zero_col,
|
|
*pi4_zero_row,
|
|
NULL_PLANE);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pi8_cost[0],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
|
|
/* add the SSD cost to bits estimate given by ECD */
|
|
tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
|
|
tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
|
|
if(i4_perform_zcbf)
|
|
{
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
zero_cbf_cost = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
zero_cbf_cost,
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
|
|
/* force the tu as zero cbf if zero_cbf_cost is lower */
|
|
if(zero_cbf_cost < tu_rd_cost)
|
|
{
|
|
/* num bytes is set to 0 */
|
|
*pi4_coeff_off = 0;
|
|
|
|
/* cbf is returned as 0 */
|
|
cbf = 0;
|
|
|
|
/* cost is returned as 0 cbf cost */
|
|
*pi8_cost = zero_cbf_cost;
|
|
|
|
/* TU bits is set to 0 */
|
|
*pi4_tu_bits = 0;
|
|
pu1_is_recon_available[0] = 0;
|
|
|
|
if(SPATIAL_DOMAIN_SSD == e_ssd_type)
|
|
{
|
|
/* copy pred to recon for zcbf mode */
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
}
|
|
/* accumulate cu not coded cost with zcbf cost */
|
|
#if ENABLE_INTER_ZCU_COST
|
|
ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
|
|
#endif
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
*pi8_cost = ihevce_it_recon_ssd(
|
|
ps_ctxt,
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi2_deq_data,
|
|
deq_data_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
packed_pred_mode,
|
|
cbf,
|
|
*pi4_zero_col,
|
|
*pi4_zero_row,
|
|
NULL_PLANE);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
{
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pi8_cost[0],
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
|
|
ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
|
|
}
|
|
#endif /* ENABLE_INTER_ZCU_COST */
|
|
}
|
|
#endif
|
|
|
|
return (cbf);
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_it_recon_fxn \endif
|
|
*
|
|
* \brief
|
|
* Transform unit level (Luma) IT Recon function
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] pi2_deq_data pointer to iq data
|
|
* \param[in] deq_data_strd iq data buffer stride
|
|
* \param[in] pu1_pred pointer to predicted data buffer
|
|
* \param[in] pred_strd predicted buffer stride
|
|
* \param[in] pu1_recon pointer to recon buffer
|
|
* \param[in] recon_strd recon buffer stride
|
|
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
|
|
* \param[in] trans_size transform size (4, 8, 16,32)
|
|
* \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
|
|
* \param[in] cbf CBF of the current block
|
|
* \param[in] zero_cols zero_cols of the current block
|
|
* \param[in] zero_rows zero_rows of the current block
|
|
*
|
|
* \return
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
|
|
void ihevce_it_recon_fxn(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
WORD16 *pi2_deq_data,
|
|
WORD32 deq_dat_strd,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 pred_strd,
|
|
UWORD8 *pu1_recon,
|
|
WORD32 recon_strd,
|
|
UWORD8 *pu1_ecd_data,
|
|
WORD32 trans_size,
|
|
WORD32 packed_pred_mode,
|
|
WORD32 cbf,
|
|
WORD32 zero_cols,
|
|
WORD32 zero_rows)
|
|
{
|
|
WORD32 dc_add_flag = 0;
|
|
WORD32 trans_idx;
|
|
|
|
/* translate the transform size to index for 4x4 and 8x8 */
|
|
trans_idx = trans_size >> 2;
|
|
|
|
/* if SKIP mode needs to be evaluated the pred is copied to recon */
|
|
if(PRED_MODE_SKIP == packed_pred_mode)
|
|
{
|
|
UWORD8 *pu1_curr_recon, *pu1_curr_pred;
|
|
|
|
pu1_curr_pred = pu1_pred;
|
|
pu1_curr_recon = pu1_recon;
|
|
|
|
/* 2D copy of data */
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
|
|
pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
|
|
|
|
return;
|
|
}
|
|
|
|
/* for intra 4x4 DST transform should be used */
|
|
if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
|
|
{
|
|
trans_idx = 0;
|
|
}
|
|
/* for 16x16 cases */
|
|
else if(16 == trans_size)
|
|
{
|
|
trans_idx = 3;
|
|
}
|
|
/* for 32x32 cases */
|
|
else if(32 == trans_size)
|
|
{
|
|
trans_idx = 4;
|
|
}
|
|
|
|
/*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
|
|
if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
|
|
{
|
|
dc_add_flag = 1;
|
|
}
|
|
|
|
if(0 == cbf)
|
|
{
|
|
/* buffer copy */
|
|
ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
|
|
pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
|
|
}
|
|
else if((1 == dc_add_flag) && (0 != trans_idx))
|
|
{
|
|
/* dc add */
|
|
ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
|
|
pu1_pred,
|
|
pred_strd,
|
|
pu1_recon,
|
|
recon_strd,
|
|
trans_size,
|
|
pi2_deq_data[0],
|
|
NULL_PLANE /* luma */
|
|
);
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->apf_it_recon[trans_idx](
|
|
pi2_deq_data,
|
|
&ps_ctxt->ai2_scratch[0],
|
|
pu1_pred,
|
|
pu1_recon,
|
|
deq_dat_strd,
|
|
pred_strd,
|
|
recon_strd,
|
|
zero_cols,
|
|
zero_rows);
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_chroma_it_recon_fxn \endif
|
|
*
|
|
* \brief
|
|
* Transform unit level (Chroma) IT Recon function
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] pi2_deq_data pointer to iq data
|
|
* \param[in] deq_data_strd iq data buffer stride
|
|
* \param[in] pu1_pred pointer to predicted data buffer
|
|
* \param[in] pred_strd predicted buffer stride
|
|
* \param[in] pu1_recon pointer to recon buffer
|
|
* \param[in] recon_strd recon buffer stride
|
|
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
|
|
* \param[in] trans_size transform size (4, 8, 16)
|
|
* \param[in] cbf CBF of the current block
|
|
* \param[in] zero_cols zero_cols of the current block
|
|
* \param[in] zero_rows zero_rows of the current block
|
|
*
|
|
* \return
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
|
|
void ihevce_chroma_it_recon_fxn(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
WORD16 *pi2_deq_data,
|
|
WORD32 deq_dat_strd,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 pred_strd,
|
|
UWORD8 *pu1_recon,
|
|
WORD32 recon_strd,
|
|
UWORD8 *pu1_ecd_data,
|
|
WORD32 trans_size,
|
|
WORD32 cbf,
|
|
WORD32 zero_cols,
|
|
WORD32 zero_rows,
|
|
CHROMA_PLANE_ID_T e_chroma_plane)
|
|
{
|
|
WORD32 trans_idx;
|
|
|
|
ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
|
|
|
|
/* since 2x2 transform is not allowed for chroma*/
|
|
if(2 == trans_size)
|
|
{
|
|
trans_size = 4;
|
|
}
|
|
|
|
/* translate the transform size to index */
|
|
trans_idx = trans_size >> 2;
|
|
|
|
/* for 16x16 cases */
|
|
if(16 == trans_size)
|
|
{
|
|
trans_idx = 3;
|
|
}
|
|
|
|
if(0 == cbf)
|
|
{
|
|
/* buffer copy */
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
|
|
}
|
|
else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
|
|
{
|
|
/* dc add */
|
|
ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
|
|
pu1_pred,
|
|
pred_strd,
|
|
pu1_recon,
|
|
recon_strd,
|
|
trans_size,
|
|
pi2_deq_data[0],
|
|
e_chroma_plane /* chroma plane */
|
|
);
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
|
|
pi2_deq_data,
|
|
&ps_ctxt->ai2_scratch[0],
|
|
pu1_pred + (WORD32)e_chroma_plane,
|
|
pu1_recon + (WORD32)e_chroma_plane,
|
|
deq_dat_strd,
|
|
pred_strd,
|
|
recon_strd,
|
|
zero_cols,
|
|
zero_rows);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*******************************************************************************
|
|
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
|
|
*
|
|
* \brief * Filters the RDOPT candidates based on mpm_idx
|
|
*
|
|
* \par Description
|
|
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
|
|
* for a CU
|
|
*
|
|
* \param[in] ps_ctxt : ptr to enc loop context
|
|
* \param[in] ps_cu_analyse : ptr to CU analyse structure
|
|
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
|
|
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
|
|
* \param[in] pu1_luma_mode luma mode
|
|
*
|
|
* \returns none
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
void ihevce_mpm_idx_based_filter_RDOPT_cand(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
nbr_4x4_t *ps_left_nbr_4x4,
|
|
nbr_4x4_t *ps_top_nbr_4x4,
|
|
UWORD8 *pu1_luma_mode,
|
|
UWORD8 *pu1_eval_mark)
|
|
{
|
|
WORD32 cu_pos_x;
|
|
WORD32 cu_pos_y;
|
|
nbr_avail_flags_t s_nbr;
|
|
WORD32 trans_size;
|
|
WORD32 au4_cand_mode_list[3];
|
|
WORD32 nbr_flags;
|
|
UWORD8 *pu1_intra_luma_modes;
|
|
WORD32 rdopt_cand_ctr = 0;
|
|
UWORD8 *pu1_luma_eval_mark;
|
|
|
|
cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
|
|
cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
|
|
trans_size = ps_cu_analyse->u1_cu_size;
|
|
|
|
/* get the neighbour availability flags */
|
|
nbr_flags = ihevce_get_nbr_intra(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
trans_size >> 2);
|
|
(void)nbr_flags;
|
|
/*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
|
|
*TU=CU/2 also since the modes are same in both the cases.
|
|
*/
|
|
ihevce_populate_intra_pred_mode(
|
|
ps_top_nbr_4x4->b6_luma_intra_mode,
|
|
ps_left_nbr_4x4->b6_luma_intra_mode,
|
|
s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
cu_pos_y,
|
|
&au4_cand_mode_list[0]);
|
|
|
|
/*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
|
|
*cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
|
|
*/
|
|
|
|
pu1_intra_luma_modes = pu1_luma_mode;
|
|
pu1_luma_eval_mark = pu1_eval_mark;
|
|
|
|
while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
|
|
{
|
|
WORD32 i;
|
|
WORD32 found_flag = 0;
|
|
|
|
/*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
|
|
*irrespective of whether the cand is present in the mpm idx list or not
|
|
*/
|
|
if(rdopt_cand_ctr == 0)
|
|
{
|
|
rdopt_cand_ctr++;
|
|
continue;
|
|
}
|
|
|
|
for(i = 0; i < 3; i++)
|
|
{
|
|
if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
|
|
{
|
|
found_flag = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(found_flag == 0)
|
|
{
|
|
pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
|
|
}
|
|
|
|
rdopt_cand_ctr++;
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
|
|
*
|
|
* \brief
|
|
* Intra Coding unit funtion for RD opt mode
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
|
|
* \param[in] pu1_luma_mode : pointer to luma mode
|
|
* \param[in] ps_cu_analyse pointer to cu analyse pointer
|
|
* \param[in] pu1_src pointer to source data buffer
|
|
* \param[in] src_strd source buffer stride
|
|
* \param[in] pu1_cu_left pointer to left recon data buffer
|
|
* \param[in] pu1_cu_top pointer to top recon data buffer
|
|
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
|
|
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
|
|
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
|
|
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
|
|
* \param[in] cu_left_stride left recon buffer stride
|
|
* \param[in] curr_buf_idx RD opt buffer index for current usage
|
|
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
|
|
*
|
|
* \return
|
|
* RDopt cost
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
LWORD64 ihevce_intra_rdopt_cu_ntu(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
void *pv_pred_org,
|
|
WORD32 pred_strd_org,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
UWORD8 *pu1_luma_mode,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
void *pv_curr_src,
|
|
void *pv_cu_left,
|
|
void *pv_cu_top,
|
|
void *pv_cu_top_left,
|
|
nbr_4x4_t *ps_left_nbr_4x4,
|
|
nbr_4x4_t *ps_top_nbr_4x4,
|
|
WORD32 nbr_4x4_left_strd,
|
|
WORD32 cu_left_stride,
|
|
WORD32 curr_buf_idx,
|
|
WORD32 func_proc_mode,
|
|
WORD32 i4_alpha_stim_multiplier)
|
|
{
|
|
enc_loop_cu_final_prms_t *ps_final_prms;
|
|
nbr_avail_flags_t s_nbr;
|
|
nbr_4x4_t *ps_nbr_4x4;
|
|
nbr_4x4_t *ps_tmp_lt_4x4;
|
|
recon_datastore_t *ps_recon_datastore;
|
|
|
|
ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
|
|
UWORD32 *pu4_nbr_flags;
|
|
UWORD8 *pu1_intra_pred_mode;
|
|
WORD32 cu_pos_x;
|
|
WORD32 cu_pos_y;
|
|
WORD32 trans_size = 0;
|
|
UWORD8 *pu1_left;
|
|
UWORD8 *pu1_top;
|
|
UWORD8 *pu1_top_left;
|
|
UWORD8 *pu1_recon;
|
|
UWORD8 *pu1_csbf_buf;
|
|
UWORD8 *pu1_ecd_data;
|
|
WORD16 *pi2_deq_data;
|
|
WORD32 deq_data_strd;
|
|
LWORD64 total_rdopt_cost;
|
|
WORD32 ctr;
|
|
WORD32 left_strd;
|
|
WORD32 i4_recon_stride;
|
|
WORD32 csbf_strd;
|
|
WORD32 ecd_data_bytes_cons;
|
|
WORD32 num_4x4_in_tu;
|
|
WORD32 num_4x4_in_cu;
|
|
WORD32 chrm_present_flag;
|
|
WORD32 tx_size;
|
|
WORD32 cu_bits;
|
|
WORD32 num_cu_parts = 0;
|
|
WORD32 num_cands = 0;
|
|
WORD32 cu_pos_x_8pelunits;
|
|
WORD32 cu_pos_y_8pelunits;
|
|
WORD32 i4_perform_rdoq;
|
|
WORD32 i4_perform_sbh;
|
|
UWORD8 u1_compute_spatial_ssd;
|
|
UWORD8 u1_compute_recon;
|
|
UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
|
|
|
|
UWORD16 u2_num_tus_in_cu = 0;
|
|
WORD32 is_sub_pu_in_hq = 0;
|
|
/* Get the RDOPT cost of the best CU mode for early_exit */
|
|
LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
|
|
/* cabac context of prev intra luma pred flag */
|
|
UWORD8 u1_prev_flag_cabac_ctxt =
|
|
ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
|
|
WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
|
|
|
|
UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
|
|
|
|
total_rdopt_cost = 0;
|
|
ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
|
|
ps_recon_datastore = &ps_final_prms->s_recon_datastore;
|
|
i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
csbf_strd = ps_ctxt->i4_cu_csbf_strd;
|
|
pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
|
|
pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
|
|
pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
|
|
deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
|
|
ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
|
|
ps_tmp_lt_4x4 = ps_left_nbr_4x4;
|
|
pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
|
|
pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
|
|
cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
|
|
cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
|
|
cu_pos_x_8pelunits = cu_pos_x;
|
|
cu_pos_y_8pelunits = cu_pos_y;
|
|
|
|
/* reset cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost = 0;
|
|
|
|
/* based on the Processng mode */
|
|
if(TU_EQ_CU == func_proc_mode)
|
|
{
|
|
ps_final_prms->u1_part_mode = SIZE_2Nx2N;
|
|
trans_size = ps_cu_analyse->u1_cu_size;
|
|
num_cu_parts = 1;
|
|
num_cands = 1;
|
|
u2_num_tus_in_cu = 1;
|
|
}
|
|
else if(TU_EQ_CU_DIV2 == func_proc_mode)
|
|
{
|
|
ps_final_prms->u1_part_mode = SIZE_2Nx2N;
|
|
trans_size = ps_cu_analyse->u1_cu_size >> 1;
|
|
num_cu_parts = 4;
|
|
num_cands = 1;
|
|
u2_num_tus_in_cu = 4;
|
|
}
|
|
else if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
ps_final_prms->u1_part_mode = SIZE_NxN;
|
|
trans_size = ps_cu_analyse->u1_cu_size >> 1;
|
|
num_cu_parts = 4;
|
|
/*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
|
|
if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
|
|
{
|
|
if(ps_ctxt->i1_slice_type != BSLICE)
|
|
{
|
|
num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
|
|
}
|
|
else
|
|
{
|
|
num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
num_cands = MAX_INTRA_CU_CANDIDATES;
|
|
}
|
|
u2_num_tus_in_cu = 4;
|
|
}
|
|
else
|
|
{
|
|
/* should not enter here */
|
|
ASSERT(0);
|
|
}
|
|
|
|
if(ps_ctxt->i1_cu_qp_delta_enable)
|
|
{
|
|
ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
|
|
}
|
|
|
|
if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
ps_ctxt->i8_cl_ssd_lambda_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
|
|
100.0f);
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
|
|
(100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
|
|
}
|
|
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
|
|
(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
|
|
if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
}
|
|
|
|
/* populate the neigbours */
|
|
pu1_left = (UWORD8 *)pv_cu_left;
|
|
pu1_top = (UWORD8 *)pv_cu_top;
|
|
pu1_top_left = (UWORD8 *)pv_cu_top_left;
|
|
left_strd = cu_left_stride;
|
|
num_4x4_in_tu = (trans_size >> 2);
|
|
num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
|
|
chrm_present_flag = 1;
|
|
ecd_data_bytes_cons = 0;
|
|
cu_bits = 0;
|
|
|
|
/* get the 4x4 level postion of current cu */
|
|
cu_pos_x = cu_pos_x << 1;
|
|
cu_pos_y = cu_pos_y << 1;
|
|
|
|
/* pouplate cu level params knowing that current is intra */
|
|
ps_final_prms->u1_skip_flag = 0;
|
|
ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
|
|
ps_final_prms->u2_num_pus_in_cu = 1;
|
|
/*init the is_cu_coded flag*/
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
ps_final_prms->u4_cu_sad = 0;
|
|
|
|
ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
|
|
ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
|
|
ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
|
|
ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
|
|
ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
|
|
ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
|
|
|
|
ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
|
|
|
|
/*copy qp directly as intra cant be skip*/
|
|
ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
|
|
ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
|
|
ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
|
|
ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
|
|
ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
|
|
ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
|
|
ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
|
|
ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
|
|
ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
memcpy(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* RDOPT copy States :update to init state if 0 cbf */
|
|
memcpy(
|
|
&au1_intra_nxn_rdopt_ctxt_models[0][0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
memcpy(
|
|
&au1_intra_nxn_rdopt_ctxt_models[1][0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* loop for all partitions in CU blocks */
|
|
for(ctr = 0; ctr < num_cu_parts; ctr++)
|
|
{
|
|
UWORD8 *pu1_curr_mode;
|
|
WORD32 cand_ctr;
|
|
WORD32 nbr_flags;
|
|
|
|
/* for NxN case to track the best mode */
|
|
/* for other cases zeroth index will be used */
|
|
intra_prev_rem_flags_t as_intra_prev_rem[2];
|
|
LWORD64 ai8_cand_rdopt_cost[2];
|
|
UWORD32 au4_tu_sad[2];
|
|
WORD32 ai4_tu_bits[2];
|
|
WORD32 ai4_cbf[2];
|
|
WORD32 ai4_curr_bytes[2];
|
|
WORD32 ai4_zero_col[2];
|
|
WORD32 ai4_zero_row[2];
|
|
/* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
|
|
cand. are there) ping-pong buffer to store the best and current */
|
|
UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
|
|
UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
|
|
WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
|
|
/* Context models stored for RDopt store and restore purpose */
|
|
|
|
UWORD8 au1_recon_availability[2];
|
|
|
|
WORD32 best_cand_idx = 0;
|
|
LWORD64 best_cand_cost = MAX_COST_64;
|
|
/* counters to toggle b/w best and current */
|
|
WORD32 best_intra_buf_idx = 1;
|
|
WORD32 curr_intra_buf_idx = 0;
|
|
|
|
/* copy the mode pointer to be used in inner loop */
|
|
pu1_curr_mode = pu1_luma_mode;
|
|
|
|
/* get the neighbour availability flags */
|
|
nbr_flags = ihevce_get_nbr_intra(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
num_4x4_in_tu);
|
|
|
|
/* copy the nbr flags for chroma reuse */
|
|
if(4 != trans_size)
|
|
{
|
|
*pu4_nbr_flags = nbr_flags;
|
|
}
|
|
else if(1 == chrm_present_flag)
|
|
{
|
|
/* compute the avail flags assuming luma trans is 8x8 */
|
|
/* get the neighbour availability flags */
|
|
*pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
(num_4x4_in_tu << 1),
|
|
(num_4x4_in_tu << 1));
|
|
}
|
|
|
|
u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
|
|
|
|
if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
|
|
{
|
|
ps_recon_datastore->u1_is_lumaRecon_available = 1;
|
|
}
|
|
else if(!ctr)
|
|
{
|
|
ps_recon_datastore->u1_is_lumaRecon_available = 0;
|
|
}
|
|
|
|
ihevc_intra_pred_luma_ref_substitution_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
|
|
/* call reference array substitution */
|
|
ihevc_intra_pred_luma_ref_substitution_fptr(
|
|
pu1_top_left,
|
|
pu1_top,
|
|
pu1_left,
|
|
left_strd,
|
|
trans_size,
|
|
nbr_flags,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1);
|
|
|
|
/* Intra Mode gating based on MPM cand list and encoder quality preset */
|
|
if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
|
|
(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
|
|
{
|
|
ihevce_mpm_idx_based_filter_RDOPT_cand(
|
|
ps_ctxt,
|
|
ps_cu_analyse,
|
|
ps_left_nbr_4x4,
|
|
ps_top_nbr_4x4,
|
|
pu1_luma_mode,
|
|
&ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
|
|
}
|
|
|
|
if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
(ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
|
|
{
|
|
WORD32 ai4_mpm_mode_list[3];
|
|
WORD32 i;
|
|
|
|
WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
|
|
|
|
ihevce_populate_intra_pred_mode(
|
|
ps_top_nbr_4x4->b6_luma_intra_mode,
|
|
ps_tmp_lt_4x4->b6_luma_intra_mode,
|
|
s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
cu_pos_y,
|
|
&ai4_mpm_mode_list[0]);
|
|
|
|
for(i = 0; i < 3; i++)
|
|
{
|
|
if(ps_cu_analyse->s_cu_intra_cand
|
|
.au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
|
|
{
|
|
ASSERT(ai4_mpm_mode_list[i] < 35);
|
|
|
|
ps_cu_analyse->s_cu_intra_cand
|
|
.au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
|
|
pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
|
|
ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
|
|
i4_curr_index++;
|
|
}
|
|
}
|
|
|
|
pu1_luma_mode[i4_curr_index] = 255;
|
|
}
|
|
|
|
/* loop over candidates for each partition */
|
|
for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
|
|
{
|
|
WORD32 curr_pred_mode;
|
|
WORD32 bits = 0;
|
|
LWORD64 curr_cost;
|
|
WORD32 luma_pred_func_idx;
|
|
UWORD8 *pu1_curr_ecd_data;
|
|
WORD16 *pi2_curr_deq_data;
|
|
WORD32 curr_deq_data_strd;
|
|
WORD32 pred_strd;
|
|
UWORD8 *pu1_pred;
|
|
|
|
/* if NXN case the recon and ecd data is stored in temp buffers */
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
|
|
pred_strd = trans_size;
|
|
pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
|
|
pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
|
|
curr_deq_data_strd = trans_size;
|
|
|
|
ASSERT(trans_size == MIN_TU_SIZE);
|
|
}
|
|
else
|
|
{
|
|
pu1_pred = (UWORD8 *)pv_pred_org;
|
|
pred_strd = pred_strd_org;
|
|
pu1_curr_ecd_data = pu1_ecd_data;
|
|
pi2_curr_deq_data = pi2_deq_data;
|
|
curr_deq_data_strd = deq_data_strd;
|
|
}
|
|
|
|
pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
|
|
(ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
|
|
|
|
if(is_sub_pu_in_hq == 1)
|
|
{
|
|
curr_pred_mode = cand_ctr;
|
|
}
|
|
else
|
|
{
|
|
curr_pred_mode = pu1_curr_mode[cand_ctr];
|
|
}
|
|
|
|
/* If the candidate mode is 255, then break */
|
|
if(255 == curr_pred_mode)
|
|
{
|
|
break;
|
|
}
|
|
else if(250 == curr_pred_mode)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
/* check if this mode needs to be evaluated or not. For 2nx2n cases, this */
|
|
/* function will be called once per candidate, so this check has been done */
|
|
/* outside this function call. For NxN case, this function will be called */
|
|
/* only once, and all the candidates will be evaluated here. */
|
|
if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
|
|
{
|
|
if((TU_EQ_SUBCU == func_proc_mode) &&
|
|
(0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* call reference filtering */
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
trans_size,
|
|
(UWORD8 *)ps_ctxt->pv_ref_filt_out,
|
|
curr_pred_mode,
|
|
ps_ctxt->i1_strong_intra_smoothing_enable_flag);
|
|
|
|
/* use the look up to get the function idx */
|
|
luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ps_ctxt->apf_lum_ip[luma_pred_func_idx](
|
|
(UWORD8 *)ps_ctxt->pv_ref_filt_out,
|
|
1,
|
|
pu1_pred,
|
|
pred_strd,
|
|
trans_size,
|
|
curr_pred_mode);
|
|
|
|
/* populate the coeffs scan idx */
|
|
ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
/* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
|
|
if(trans_size < 16)
|
|
{
|
|
/* for modes from 22 upto 30 horizontal scan is used */
|
|
if((curr_pred_mode > 21) && (curr_pred_mode < 31))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_HORZ;
|
|
}
|
|
/* for modes from 6 upto 14 horizontal scan is used */
|
|
else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_VERT;
|
|
}
|
|
}
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
|
|
i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
|
|
|
|
#if DISABLE_RDOQ_INTRA
|
|
i4_perform_rdoq = 0;
|
|
#endif
|
|
|
|
/*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
|
|
/* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
|
|
/* Currently the complete array will contain only single value*/
|
|
/*The rounding factor is calculated with the formula
|
|
Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
|
|
rounding factor = (1 - DeadZone Val)
|
|
|
|
Assumption: Cabac states of All the sub-blocks in the TU are considered independent
|
|
*/
|
|
if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
|
|
{
|
|
if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
|
|
{
|
|
double i4_lamda_modifier;
|
|
|
|
if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
|
|
{
|
|
i4_lamda_modifier =
|
|
ps_ctxt->i4_lamda_modifier *
|
|
CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
|
|
}
|
|
else
|
|
{
|
|
i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
|
|
}
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
if(ISLICE == ps_ctxt->i1_slice_type)
|
|
{
|
|
i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
|
|
}
|
|
else
|
|
{
|
|
i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
|
|
}
|
|
}
|
|
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
|
|
&ps_ctxt->i4_quant_round_tu[0][0];
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
|
|
&ps_ctxt->i4_quant_round_tu[1][0];
|
|
|
|
memset(
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
0,
|
|
trans_size * trans_size * sizeof(WORD32));
|
|
memset(
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
0,
|
|
trans_size * trans_size * sizeof(WORD32));
|
|
|
|
ihevce_quant_rounding_factor_gen(
|
|
trans_size,
|
|
1,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
i4_lamda_modifier,
|
|
1);
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
|
|
ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
|
|
ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
|
|
}
|
|
}
|
|
|
|
/* call T Q IT IQ and recon function */
|
|
ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_pred,
|
|
pred_strd,
|
|
(UWORD8 *)pv_curr_src,
|
|
src_strd,
|
|
pi2_curr_deq_data,
|
|
curr_deq_data_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_curr_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
trans_size,
|
|
PRED_MODE_INTRA,
|
|
&ai8_cand_rdopt_cost[curr_intra_buf_idx],
|
|
&ai4_curr_bytes[curr_intra_buf_idx],
|
|
&ai4_tu_bits[curr_intra_buf_idx],
|
|
&au4_tu_sad[curr_intra_buf_idx],
|
|
&ai4_zero_col[curr_intra_buf_idx],
|
|
&ai4_zero_row[curr_intra_buf_idx],
|
|
&au1_recon_availability[curr_intra_buf_idx],
|
|
i4_perform_rdoq,
|
|
i4_perform_sbh,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
1 /*early_cbf */
|
|
);
|
|
|
|
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
|
|
pv_curr_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
|
|
{
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
|
|
pv_curr_src,
|
|
src_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
else
|
|
{
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
|
|
pv_curr_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
ai8_cand_rdopt_cost[curr_intra_buf_idx],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
|
|
}
|
|
|
|
/* based on CBF/No CBF copy the corresponding state */
|
|
if(0 == ai4_cbf[curr_intra_buf_idx])
|
|
{
|
|
/* RDOPT copy States :update to init state if 0 cbf */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
else
|
|
{
|
|
/* RDOPT copy States :update to new state only if CBF is non zero */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
|
|
/* call the function which perform intra mode prediction */
|
|
ihevce_intra_pred_mode_signaling(
|
|
ps_top_nbr_4x4->b6_luma_intra_mode,
|
|
ps_tmp_lt_4x4->b6_luma_intra_mode,
|
|
s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
cu_pos_y,
|
|
curr_pred_mode,
|
|
&as_intra_prev_rem[curr_intra_buf_idx]);
|
|
/******************************************************************/
|
|
/* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
|
|
The bits for these are evaluated for every RDO mode of current subcu
|
|
as they can significantly contribute to RDO cost. Note that these
|
|
bits are not accounted for here (ai8_cand_rdopt_cost) as they
|
|
are accounted for in encode_cu call later */
|
|
|
|
/******************************************************************/
|
|
/* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
|
|
The bits for these are evaluated for every RDO mode of current subcu
|
|
as they can significantly contribute to RDO cost. Note that these
|
|
bits are not accounted for here (ai8_cand_rdopt_cost) as they
|
|
are accounted for in encode_cu call later */
|
|
|
|
/* Estimate bits to encode prev rem flag for NXN mode */
|
|
{
|
|
WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
|
|
[u1_prev_flag_cabac_ctxt ^
|
|
as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
|
|
|
|
/* rounding the fractional bits to nearest integer */
|
|
bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
|
|
}
|
|
|
|
/* based on prev flag all the mpmidx bits and rem bits */
|
|
if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
|
|
{
|
|
/* mpm_idx */
|
|
bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
|
|
}
|
|
else
|
|
{
|
|
/* rem intra mode */
|
|
bits += 5;
|
|
}
|
|
|
|
bits += ai4_tu_bits[curr_intra_buf_idx];
|
|
|
|
/* compute the total cost for current candidate */
|
|
curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
|
|
|
|
/* get the final ssd cost */
|
|
curr_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
|
|
/* check of the best candidate cost */
|
|
if(curr_cost < best_cand_cost)
|
|
{
|
|
best_cand_cost = curr_cost;
|
|
best_cand_idx = cand_ctr;
|
|
best_intra_buf_idx = curr_intra_buf_idx;
|
|
curr_intra_buf_idx = !curr_intra_buf_idx;
|
|
}
|
|
}
|
|
|
|
/*************** For TU_EQ_SUBCU case *****************/
|
|
/* Copy the pred for best cand. to the final pred array */
|
|
/* Copy the iq-coeff for best cand. to the final array */
|
|
/* copy the best coeffs data to final buffer */
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
/* Copy the pred for best cand. to the final pred array */
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
(UWORD8 *)pv_pred_org,
|
|
pred_strd_org,
|
|
&au1_cur_pred_data[best_intra_buf_idx][0],
|
|
trans_size,
|
|
trans_size,
|
|
trans_size);
|
|
|
|
/* Copy the deq-coeff for best cand. to the final array */
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
(UWORD8 *)pi2_deq_data,
|
|
deq_data_strd << 1,
|
|
(UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
|
|
trans_size << 1,
|
|
trans_size << 1,
|
|
trans_size);
|
|
/* copy the coeffs to final cu ecd bytes buffer */
|
|
memcpy(
|
|
pu1_ecd_data,
|
|
&au1_intra_coeffs[best_intra_buf_idx][0],
|
|
ai4_curr_bytes[best_intra_buf_idx]);
|
|
|
|
pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
|
|
(ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
|
|
}
|
|
|
|
/*---------- Calculate Recon for the best INTRA mode ---------*/
|
|
/* TU_EQ_CU case : No need for recon, otherwise recon is required */
|
|
/* Compute recon only for the best mode for TU_EQ_SUBCU case */
|
|
if(u1_compute_recon)
|
|
{
|
|
ihevce_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_deq_data,
|
|
deq_data_strd,
|
|
(UWORD8 *)pv_pred_org,
|
|
pred_strd_org,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
PRED_MODE_INTRA,
|
|
ai4_cbf[best_intra_buf_idx],
|
|
ai4_zero_col[best_intra_buf_idx],
|
|
ai4_zero_row[best_intra_buf_idx]);
|
|
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
|
|
}
|
|
else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
|
|
{
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
|
|
}
|
|
|
|
/* RDOPT copy States :update to best modes state */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
&au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* copy the prev,mpm_idx and rem modes from best cand */
|
|
ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
|
|
|
|
/* update the cabac context of prev intra pred mode flag */
|
|
u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
|
|
[(u1_prev_flag_cabac_ctxt << 1) |
|
|
as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
|
|
|
|
/* accumulate the TU bits into cu bits */
|
|
cu_bits += ai4_tu_bits[best_intra_buf_idx];
|
|
|
|
/* copy the intra pred mode for chroma reuse */
|
|
if(is_sub_pu_in_hq == 0)
|
|
{
|
|
*pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
|
|
}
|
|
else
|
|
{
|
|
*pu1_intra_pred_mode = best_cand_idx;
|
|
}
|
|
|
|
/* Store luma mode as chroma mode. If chroma prcs happens, and
|
|
if a diff. mode wins, it should update this!! */
|
|
if(1 == chrm_present_flag)
|
|
{
|
|
if(is_sub_pu_in_hq == 0)
|
|
{
|
|
ps_final_prms->u1_chroma_intra_pred_actual_mode =
|
|
((ps_ctxt->u1_chroma_array_type == 2)
|
|
? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
|
|
: pu1_curr_mode[best_cand_idx]);
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->u1_chroma_intra_pred_actual_mode =
|
|
((ps_ctxt->u1_chroma_array_type == 2)
|
|
? gau1_chroma422_intra_angle_mapping[best_cand_idx]
|
|
: best_cand_idx);
|
|
}
|
|
|
|
ps_final_prms->u1_chroma_intra_pred_mode = 4;
|
|
}
|
|
|
|
/*remember the cbf flag to replicate qp for 4x4 neighbour*/
|
|
ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
|
|
|
|
/*accumulate ssd over all TU of intra CU*/
|
|
ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
|
|
|
|
/* update the bytes */
|
|
ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
|
|
ai4_curr_bytes[best_intra_buf_idx];
|
|
/* update the zero_row and col info for the final mode */
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
|
|
ai4_zero_col[best_intra_buf_idx];
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
|
|
ai4_zero_row[best_intra_buf_idx];
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
|
|
|
|
/* update the total bytes cons */
|
|
ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
|
|
pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
|
|
GETRANGE(tx_size, trans_size);
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
|
|
|
|
/* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
|
|
ps_nbr_4x4->b1_skip_flag = 0;
|
|
ps_nbr_4x4->b1_intra_flag = 1;
|
|
ps_nbr_4x4->b1_pred_l0_flag = 0;
|
|
ps_nbr_4x4->b1_pred_l1_flag = 0;
|
|
|
|
if(is_sub_pu_in_hq == 0)
|
|
{
|
|
ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
|
|
}
|
|
else
|
|
{
|
|
ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
|
|
}
|
|
|
|
ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
|
|
|
|
/* since tu size can be less than cusize, replication is done with strd */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
|
|
ps_tmp_4x4 = ps_nbr_4x4;
|
|
|
|
for(i = 0; i < num_4x4_in_tu; i++)
|
|
{
|
|
for(j = 0; j < num_4x4_in_tu; j++)
|
|
{
|
|
ps_tmp_4x4[j] = *ps_nbr_4x4;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += num_4x4_in_cu;
|
|
}
|
|
}
|
|
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
|
|
}
|
|
|
|
if((num_cu_parts > 1) && (ctr < 3))
|
|
{
|
|
/* set the neighbour map to 1 */
|
|
ihevce_set_nbr_map(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
trans_size >> 2,
|
|
1);
|
|
|
|
/* block level updates block number (1 & 3 )*/
|
|
pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
|
|
pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
|
|
pi2_deq_data += trans_size;
|
|
|
|
switch(ctr)
|
|
{
|
|
case 0:
|
|
{
|
|
pu1_left = pu1_recon + trans_size - 1;
|
|
pu1_top += trans_size;
|
|
pu1_top_left = pu1_top - 1;
|
|
left_strd = i4_recon_stride;
|
|
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
ASSERT(
|
|
(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
|
|
(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
|
|
|
|
/* Since the 'lumaRefSubstitution' function expects both Top and */
|
|
/* TopRight recon pixels to be present in the same buffer */
|
|
if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
|
|
{
|
|
UWORD8 *pu1_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
|
|
trans_size;
|
|
UWORD8 *pu1_dst =
|
|
((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
|
|
trans_size;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
|
|
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
|
|
}
|
|
|
|
pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
|
|
pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
|
|
(trans_size - 1) * i4_recon_stride;
|
|
pu1_top_left = pu1_left - cu_left_stride;
|
|
left_strd = cu_left_stride;
|
|
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
ASSERT(
|
|
(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
|
|
(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
|
|
|
|
pu1_left = pu1_recon + trans_size - 1;
|
|
pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
|
|
(trans_size - 1) * i4_recon_stride + trans_size;
|
|
pu1_top_left = pu1_top - 1;
|
|
left_strd = i4_recon_stride;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
pu1_csbf_buf += num_4x4_in_tu;
|
|
cu_pos_x += num_4x4_in_tu;
|
|
ps_nbr_4x4 += num_4x4_in_tu;
|
|
ps_top_nbr_4x4 += num_4x4_in_tu;
|
|
ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
|
|
|
|
pu1_intra_pred_mode++;
|
|
|
|
/* after 2 blocks increment the pointers to bottom blocks */
|
|
if(1 == ctr)
|
|
{
|
|
pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
|
|
pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
|
|
|
|
pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
|
|
pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
|
|
pi2_deq_data -= (trans_size << 1);
|
|
pi2_deq_data += (trans_size * deq_data_strd);
|
|
|
|
pu1_csbf_buf -= (num_4x4_in_tu << 1);
|
|
pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
|
|
|
|
ps_nbr_4x4 -= (num_4x4_in_tu << 1);
|
|
ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
|
|
ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
|
|
ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
|
|
|
|
/* decrement pos x to start */
|
|
cu_pos_x -= (num_4x4_in_tu << 1);
|
|
cu_pos_y += num_4x4_in_tu;
|
|
}
|
|
}
|
|
|
|
#if RDOPT_ENABLE
|
|
/* compute the RDOPT cost for the current TU */
|
|
ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
|
|
ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
#endif
|
|
|
|
/* accumulate the costs */
|
|
total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(total_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
return (total_rdopt_cost);
|
|
}
|
|
}
|
|
|
|
/* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
|
|
chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
|
|
|
|
pu4_nbr_flags++;
|
|
}
|
|
/* Modify the cost function for this CU. */
|
|
/* loop in for 8x8 blocks */
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
UWORD8 *pu1_recon_cu;
|
|
WORD32 recon_stride;
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
WORD32 start_index;
|
|
WORD32 num_horz_cu_in_ctb;
|
|
WORD32 cu_size;
|
|
WORD32 had_block_size;
|
|
|
|
/* tODO: sreenivasa ctb size has to be used appropriately */
|
|
had_block_size = 8;
|
|
cu_size = ps_cu_analyse->u1_cu_size; /* todo */
|
|
num_horz_cu_in_ctb = 64 / had_block_size;
|
|
|
|
curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
|
|
curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
|
|
recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
pu1_recon_cu =
|
|
((UWORD8 *)ps_final_prms->s_recon_datastore
|
|
.apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
|
|
/* + \ curr_pos_x + curr_pos_y * recon_stride; */
|
|
|
|
/* start index to index the source satd of curr cu int he current ctb*/
|
|
start_index =
|
|
(curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
|
|
|
|
{
|
|
total_rdopt_cost += ihevce_psy_rd_cost(
|
|
ps_ctxt->ai4_source_satd_8x8,
|
|
pu1_recon_cu,
|
|
recon_stride,
|
|
1, //
|
|
cu_size,
|
|
0, // pic type
|
|
0, //layer id
|
|
ps_ctxt->i4_satd_lamda, // lambda
|
|
start_index,
|
|
ps_ctxt->u1_is_input_data_hbd,
|
|
ps_ctxt->u4_psy_strength,
|
|
&ps_ctxt->s_cmn_opt_func
|
|
|
|
); // 8 bit
|
|
}
|
|
}
|
|
|
|
#if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
UWORD8 au1_tu_eq_cu_div2_modes[4];
|
|
UWORD8 au1_freq_of_mode[4];
|
|
|
|
WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
|
|
ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
|
|
|
|
if(1 == i4_num_clusters)
|
|
{
|
|
ps_final_prms->u2_num_pus_in_cu = 1;
|
|
ps_final_prms->u1_part_mode = SIZE_2Nx2N;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* store the num TUs*/
|
|
ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
|
|
|
|
/* update the bytes consumed */
|
|
ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
|
|
|
|
/* store the current cu size to final prms */
|
|
ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
|
|
|
|
/* cu bits will be having luma residual bits till this point */
|
|
/* if zero_cbf eval is disabled then cu bits will be zero */
|
|
ps_final_prms->u4_cu_luma_res_bits = cu_bits;
|
|
|
|
/* ------------- Chroma processing -------------- */
|
|
/* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
|
|
if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
|
|
{
|
|
LWORD64 chrm_rdopt_cost;
|
|
WORD32 chrm_rdopt_tu_bits;
|
|
|
|
/* Store the current RDOPT cost to enable early exit in chrom_prcs */
|
|
ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
|
|
|
|
chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
|
|
ps_ctxt,
|
|
curr_buf_idx,
|
|
func_proc_mode,
|
|
ps_chrm_cu_buf_prms->pu1_curr_src,
|
|
ps_chrm_cu_buf_prms->i4_chrm_src_stride,
|
|
ps_chrm_cu_buf_prms->pu1_cu_left,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top_left,
|
|
ps_chrm_cu_buf_prms->i4_cu_left_stride,
|
|
cu_pos_x_8pelunits,
|
|
cu_pos_y_8pelunits,
|
|
&chrm_rdopt_tu_bits,
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy);
|
|
|
|
#if WEIGH_CHROMA_COST
|
|
chrm_rdopt_cost = (LWORD64)(
|
|
(chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
|
|
#if CHROMA_RDOPT_ENABLE
|
|
total_rdopt_cost += chrm_rdopt_cost;
|
|
#endif
|
|
cu_bits += chrm_rdopt_tu_bits;
|
|
|
|
/* cu bits for chroma residual if chroma rdopt is on */
|
|
/* if zero_cbf eval is disabled then cu bits will be zero */
|
|
ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(total_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
return (total_rdopt_cost);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{}
|
|
|
|
/* RDOPT copy States : Best after all luma TUs to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* get the neighbour availability flags for current cu */
|
|
ihevce_get_only_nbr_flag(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(cu_pos_x_8pelunits << 1),
|
|
(cu_pos_y_8pelunits << 1),
|
|
(trans_size << 1),
|
|
(trans_size << 1));
|
|
|
|
/* call the entropy rdo encode to get the bit estimate for current cu */
|
|
/*if ZERO_CBF eval is enabled then this function will return only CU header bits */
|
|
{
|
|
/*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
|
|
WORD32 cbf_bits, header_bits;
|
|
|
|
header_bits = ihevce_entropy_rdo_encode_cu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_final_prms,
|
|
cu_pos_x_8pelunits,
|
|
cu_pos_y_8pelunits,
|
|
ps_cu_analyse->u1_cu_size,
|
|
s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
&ps_final_prms->pu1_cu_coeffs[0],
|
|
&cbf_bits);
|
|
|
|
cu_bits += header_bits;
|
|
|
|
/* cbf bits are excluded from header bits, instead considered as texture bits */
|
|
/* incase if zero cbf eval is disabled then texture bits gets added here */
|
|
ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
|
|
ps_final_prms->u4_cu_cbf_bits = cbf_bits;
|
|
|
|
#if RDOPT_ENABLE
|
|
/* add the cost of coding the cu bits */
|
|
total_rdopt_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
#endif
|
|
}
|
|
return (total_rdopt_cost);
|
|
}
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
|
|
*
|
|
* \brief
|
|
* Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] ps_inter_cand pointer to inter candidate structure
|
|
* \param[in] pu1_src pointer to source data buffer
|
|
* \param[in] cu_size Current CU size
|
|
* \param[in] cu_pos_x cu position x w.r.t to ctb
|
|
* \param[in] cu_pos_y cu position y w.r.t to ctb
|
|
* \param[in] src_strd source buffer stride
|
|
* \param[in] curr_buf_idx buffer index for current output storage
|
|
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
|
|
*
|
|
* \return
|
|
* Rdopt cost
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
LWORD64 ihevce_inter_rdopt_cu_ntu(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
void *pv_src,
|
|
WORD32 cu_size,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
WORD32 curr_buf_idx,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
cu_inter_cand_t *ps_inter_cand,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
WORD32 i4_alpha_stim_multiplier)
|
|
{
|
|
enc_loop_cu_final_prms_t *ps_final_prms;
|
|
nbr_4x4_t *ps_nbr_4x4;
|
|
tu_prms_t s_tu_prms[64 * 4];
|
|
tu_prms_t *ps_tu_prms;
|
|
|
|
WORD32 i4_perform_rdoq;
|
|
WORD32 i4_perform_sbh;
|
|
WORD32 ai4_tu_split_flags[4];
|
|
WORD32 ai4_tu_early_cbf[4];
|
|
WORD32 num_split_flags = 1;
|
|
WORD32 i;
|
|
UWORD8 u1_tu_size;
|
|
UWORD8 *pu1_pred;
|
|
UWORD8 *pu1_ecd_data;
|
|
WORD16 *pi2_deq_data;
|
|
UWORD8 *pu1_csbf_buf;
|
|
UWORD8 *pu1_tu_sz_sft;
|
|
UWORD8 *pu1_tu_posx;
|
|
UWORD8 *pu1_tu_posy;
|
|
LWORD64 total_rdopt_cost;
|
|
WORD32 ctr;
|
|
WORD32 chrm_ctr;
|
|
WORD32 num_tu_in_cu = 0;
|
|
WORD32 pred_stride;
|
|
WORD32 recon_stride;
|
|
WORD32 trans_size = ps_cu_analyse->u1_cu_size;
|
|
WORD32 csbf_strd;
|
|
WORD32 chrm_present_flag;
|
|
WORD32 ecd_data_bytes_cons;
|
|
WORD32 num_4x4_in_cu;
|
|
WORD32 num_4x4_in_tu;
|
|
WORD32 recon_func_mode;
|
|
WORD32 cu_bits;
|
|
UWORD8 u1_compute_spatial_ssd;
|
|
|
|
/* min_trans_size is initialized to some huge number than usual TU sizes */
|
|
WORD32 i4_min_trans_size = 256;
|
|
/* Get the RDOPT cost of the best CU mode for early_exit */
|
|
LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
|
|
WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
|
|
|
|
/* model for no residue syntax qt root cbf flag */
|
|
UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
|
|
|
|
/* backup copy of cabac states for restoration if zero cu reside rdo wins later */
|
|
UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
|
|
|
|
/* for skip cases tables are not reqquired */
|
|
UWORD8 u1_skip_tu_sz_sft = 0;
|
|
UWORD8 u1_skip_tu_posx = 0;
|
|
UWORD8 u1_skip_tu_posy = 0;
|
|
UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
|
|
|
|
/* get the pointers based on curbuf idx */
|
|
ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
|
|
ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
|
|
pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
|
|
pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
|
|
csbf_strd = ps_ctxt->i4_cu_csbf_strd;
|
|
pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
|
|
|
|
pred_stride = ps_inter_cand->i4_pred_data_stride;
|
|
recon_stride = cu_size;
|
|
pu1_pred = ps_inter_cand->pu1_pred_data;
|
|
chrm_ctr = 0;
|
|
ecd_data_bytes_cons = 0;
|
|
total_rdopt_cost = 0;
|
|
num_4x4_in_cu = cu_size >> 2;
|
|
recon_func_mode = PRED_MODE_INTER;
|
|
cu_bits = 0;
|
|
|
|
/* get the 4x4 level postion of current cu */
|
|
cu_pos_x = cu_pos_x << 1;
|
|
cu_pos_y = cu_pos_y << 1;
|
|
|
|
/* default value for cu coded flag */
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
|
|
/*init of ssd of CU accuumulated over all TU*/
|
|
ps_final_prms->u4_cu_sad = 0;
|
|
|
|
/* populate the coeffs scan idx */
|
|
ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
/* reset cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost = 0;
|
|
|
|
/* backup copy of cabac states for restoration if zero cu reside rdo wins later */
|
|
memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
|
|
#endif
|
|
|
|
if(ps_cu_analyse->u1_cu_size == 64)
|
|
{
|
|
num_split_flags = 4;
|
|
u1_tu_size = 32;
|
|
}
|
|
else
|
|
{
|
|
num_split_flags = 1;
|
|
u1_tu_size = ps_cu_analyse->u1_cu_size;
|
|
}
|
|
|
|
/* ckeck for skip mode */
|
|
if(1 == ps_final_prms->u1_skip_flag)
|
|
{
|
|
if(64 == cu_size)
|
|
{
|
|
/* TU = CU/2 is set but no trnaform is evaluated */
|
|
num_tu_in_cu = 4;
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
|
|
}
|
|
else
|
|
{
|
|
/* TU = CU is set but no trnaform is evaluated */
|
|
num_tu_in_cu = 1;
|
|
pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
|
|
pu1_tu_posx = &u1_skip_tu_posx;
|
|
pu1_tu_posy = &u1_skip_tu_posy;
|
|
}
|
|
|
|
recon_func_mode = PRED_MODE_SKIP;
|
|
}
|
|
/* check for PU part mode being AMP or No AMP */
|
|
else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
|
|
{
|
|
if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
|
|
{
|
|
/* TU= CU is evaluated 2Nx2N inter case */
|
|
num_tu_in_cu = 1;
|
|
pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
|
|
pu1_tu_posx = &u1_skip_tu_posx;
|
|
pu1_tu_posy = &u1_skip_tu_posy;
|
|
}
|
|
else
|
|
{
|
|
/* currently TU= CU/2 is evaluated for all inter case */
|
|
num_tu_in_cu = 4;
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* for AMP cases one level of TU recurssion is done */
|
|
/* based on oreintation of the partitions */
|
|
num_tu_in_cu = 10;
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
}
|
|
|
|
ps_tu_prms = &s_tu_prms[0];
|
|
num_tu_in_cu = 0;
|
|
|
|
for(i = 0; i < num_split_flags; i++)
|
|
{
|
|
WORD32 i4_x_off = 0, i4_y_off = 0;
|
|
|
|
if(i == 1 || i == 3)
|
|
{
|
|
i4_x_off = 32;
|
|
}
|
|
|
|
if(i == 2 || i == 3)
|
|
{
|
|
i4_y_off = 32;
|
|
}
|
|
|
|
if(1 == ps_final_prms->u1_skip_flag)
|
|
{
|
|
ai4_tu_split_flags[0] = 0;
|
|
ps_inter_cand->ai4_tu_split_flag[i] = 0;
|
|
|
|
ai4_tu_early_cbf[0] = 0;
|
|
}
|
|
else
|
|
{
|
|
ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
|
|
ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
|
|
}
|
|
|
|
ps_tu_prms->u1_tu_size = u1_tu_size;
|
|
|
|
ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
|
|
ps_tu_prms,
|
|
&num_tu_in_cu,
|
|
0,
|
|
ai4_tu_split_flags[0],
|
|
ai4_tu_early_cbf[0],
|
|
i4_x_off,
|
|
i4_y_off);
|
|
}
|
|
|
|
/* loop for all tu blocks in current cu */
|
|
ps_tu_prms = &s_tu_prms[0];
|
|
for(ctr = 0; ctr < num_tu_in_cu; ctr++)
|
|
{
|
|
trans_size = ps_tu_prms->u1_tu_size;
|
|
|
|
if(i4_min_trans_size > trans_size)
|
|
{
|
|
i4_min_trans_size = trans_size;
|
|
}
|
|
ps_tu_prms++;
|
|
}
|
|
|
|
if(ps_ctxt->i1_cu_qp_delta_enable)
|
|
{
|
|
ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
|
|
}
|
|
|
|
if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
ps_ctxt->i8_cl_ssd_lambda_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
|
|
100.0f);
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
|
|
(100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
|
|
}
|
|
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
|
|
(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
|
|
if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
}
|
|
|
|
if(!u1_compute_spatial_ssd)
|
|
{
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
|
|
ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
|
|
}
|
|
|
|
ps_tu_prms = &s_tu_prms[0];
|
|
|
|
ASSERT(num_tu_in_cu <= 256);
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
memcpy(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
for(ctr = 0; ctr < num_tu_in_cu; ctr++)
|
|
{
|
|
WORD32 curr_bytes;
|
|
WORD32 tx_size;
|
|
WORD32 cbf, zero_col, zero_row;
|
|
LWORD64 rdopt_cost;
|
|
UWORD8 u1_is_recon_available;
|
|
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
nbr_4x4_t *ps_cur_nbr_4x4;
|
|
UWORD8 *pu1_cur_pred;
|
|
UWORD8 *pu1_cur_src;
|
|
UWORD8 *pu1_cur_recon;
|
|
WORD16 *pi2_cur_deq_data;
|
|
UWORD32 u4_tu_sad;
|
|
WORD32 tu_bits;
|
|
|
|
WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
|
|
trans_size = ps_tu_prms->u1_tu_size;
|
|
/* get the current pos x and pos y in pixels */
|
|
curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]);
|
|
curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]);
|
|
|
|
num_4x4_in_tu = trans_size >> 2;
|
|
|
|
#if FORCE_8x8_TFR
|
|
if(cu_size == 64)
|
|
{
|
|
curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
|
|
curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
|
|
}
|
|
#endif
|
|
|
|
/* increment the pointers to start of current TU */
|
|
pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
|
|
pu1_cur_src += (curr_pos_y * src_strd);
|
|
pu1_cur_pred = (pu1_pred + curr_pos_x);
|
|
pu1_cur_pred += (curr_pos_y * pred_stride);
|
|
pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
|
|
pi2_cur_deq_data += (curr_pos_y * cu_size);
|
|
pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
|
|
curr_pos_x + curr_pos_y * i4_recon_stride;
|
|
|
|
ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
|
|
ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
|
|
i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
|
|
|
|
/*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
|
|
/* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
|
|
/* Currently the complete array will contain only single value*/
|
|
/*The rounding factor is calculated with the formula
|
|
Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
|
|
rounding factor = (1 - DeadZone Val)
|
|
|
|
Assumption: Cabac states of All the sub-blocks in the TU are considered independent
|
|
*/
|
|
if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
|
|
{
|
|
double i4_lamda_modifier;
|
|
|
|
if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
|
|
{
|
|
i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
|
|
CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
|
|
}
|
|
else
|
|
{
|
|
i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
|
|
}
|
|
if(ps_ctxt->i4_use_const_lamda_modifier)
|
|
{
|
|
if(ISLICE == ps_ctxt->i1_slice_type)
|
|
{
|
|
i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
|
|
}
|
|
else
|
|
{
|
|
i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
|
|
}
|
|
}
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
|
|
&ps_ctxt->i4_quant_round_tu[0][0];
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
|
|
&ps_ctxt->i4_quant_round_tu[1][0];
|
|
|
|
memset(
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
0,
|
|
trans_size * trans_size * sizeof(WORD32));
|
|
memset(
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
0,
|
|
trans_size * trans_size * sizeof(WORD32));
|
|
|
|
ihevce_quant_rounding_factor_gen(
|
|
trans_size,
|
|
1,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
|
|
i4_lamda_modifier,
|
|
1);
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
|
|
ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
|
|
ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
|
|
ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
|
|
}
|
|
|
|
/* call T Q IT IQ and recon function */
|
|
cbf = ihevce_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_stride,
|
|
pu1_cur_src,
|
|
src_strd,
|
|
pi2_cur_deq_data,
|
|
cu_size,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
trans_size,
|
|
recon_func_mode,
|
|
&rdopt_cost,
|
|
&curr_bytes,
|
|
&tu_bits,
|
|
&u4_tu_sad,
|
|
&zero_col,
|
|
&zero_row,
|
|
&u1_is_recon_available,
|
|
i4_perform_rdoq,
|
|
i4_perform_sbh,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
|
|
|
|
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
rdopt_cost = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
src_strd,
|
|
pu1_cur_pred,
|
|
pred_stride,
|
|
rdopt_cost,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
rdopt_cost = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
src_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
rdopt_cost,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
NULL_PLANE);
|
|
}
|
|
else
|
|
{
|
|
rdopt_cost = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
src_strd,
|
|
pu1_cur_pred,
|
|
pred_stride,
|
|
rdopt_cost,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
NULL_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
|
|
}
|
|
|
|
/* accumulate the TU sad into cu sad */
|
|
ps_final_prms->u4_cu_sad += u4_tu_sad;
|
|
|
|
/* accumulate the TU bits into cu bits */
|
|
cu_bits += tu_bits;
|
|
|
|
/* inter cu is coded if any of the tu is coded in it */
|
|
ps_final_prms->u1_is_cu_coded |= cbf;
|
|
|
|
/* call the entropy function to get the bits */
|
|
/* add that to rd opt cost(SSD) */
|
|
|
|
/* update the bytes */
|
|
ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
|
|
/* update the zero_row and col info for the final mode */
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
|
|
|
|
/* update the bytes */
|
|
ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
|
|
|
|
/* update the total bytes cons */
|
|
ecd_data_bytes_cons += curr_bytes;
|
|
pu1_ecd_data += curr_bytes;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
/* update to new state only if CBF is non zero */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
|
|
/* by default chroma present is set to 1*/
|
|
chrm_present_flag = 1;
|
|
if(4 == trans_size)
|
|
{
|
|
/* if tusize is 4x4 then only first luma 4x4 will have chroma*/
|
|
if(0 != chrm_ctr)
|
|
{
|
|
chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
|
|
}
|
|
|
|
/* increment the chrm ctr unconditionally */
|
|
chrm_ctr++;
|
|
|
|
/* after ctr reached 4 reset it */
|
|
if(4 == chrm_ctr)
|
|
{
|
|
chrm_ctr = 0;
|
|
}
|
|
}
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
|
|
GETRANGE(tx_size, trans_size);
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
|
|
|
|
/* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
|
|
ps_cur_nbr_4x4->b1_y_cbf = cbf;
|
|
/*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
|
|
ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
|
|
|
|
/* Qp and cbf are stored for the all 4x4 in TU */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
ps_tmp_4x4 = ps_cur_nbr_4x4;
|
|
|
|
for(i = 0; i < num_4x4_in_tu; i++)
|
|
{
|
|
for(j = 0; j < num_4x4_in_tu; j++)
|
|
{
|
|
ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
|
|
ps_tmp_4x4[j].b1_y_cbf = cbf;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += num_4x4_in_cu;
|
|
}
|
|
}
|
|
|
|
#if RDOPT_ENABLE
|
|
/* compute the rdopt cost */
|
|
rdopt_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
#endif
|
|
/* accumulate the costs */
|
|
total_rdopt_cost += rdopt_cost;
|
|
|
|
ps_tu_prms++;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(total_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
return (total_rdopt_cost);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Modify the cost function for this CU. */
|
|
/* loop in for 8x8 blocks */
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
UWORD8 *pu1_recon_cu;
|
|
WORD32 recon_stride;
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
WORD32 start_index;
|
|
WORD32 num_horz_cu_in_ctb;
|
|
WORD32 had_block_size;
|
|
|
|
/* tODO: sreenivasa ctb size has to be used appropriately */
|
|
had_block_size = 8;
|
|
num_horz_cu_in_ctb = 64 / had_block_size;
|
|
|
|
curr_pos_x = cu_pos_x << 2; /* pel units */
|
|
curr_pos_y = cu_pos_y << 2; /* pel units */
|
|
recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
|
|
.apv_luma_recon_bufs[0]); // already pointing to the current CU recon
|
|
//+ \curr_pos_x + curr_pos_y * recon_stride;
|
|
|
|
/* start index to index the source satd of curr cu int he current ctb*/
|
|
start_index =
|
|
(curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
|
|
|
|
{
|
|
total_rdopt_cost += ihevce_psy_rd_cost(
|
|
ps_ctxt->ai4_source_satd_8x8,
|
|
pu1_recon_cu,
|
|
recon_stride,
|
|
1, //howz stride
|
|
cu_size,
|
|
0, // pic type
|
|
0, //layer id
|
|
ps_ctxt->i4_satd_lamda, // lambda
|
|
start_index,
|
|
ps_ctxt->u1_is_input_data_hbd,
|
|
ps_ctxt->u4_psy_strength,
|
|
&ps_ctxt->s_cmn_opt_func); // 8 bit
|
|
}
|
|
}
|
|
|
|
/* store the num TUs*/
|
|
ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
|
|
|
|
/* update the bytes consumed */
|
|
ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
|
|
|
|
/* store the current cu size to final prms */
|
|
ps_final_prms->u1_cu_size = cu_size;
|
|
|
|
/* cu bits will be having luma residual bits till this point */
|
|
/* if zero_cbf eval is disabled then cu bits will be zero */
|
|
ps_final_prms->u4_cu_luma_res_bits = cu_bits;
|
|
|
|
/* ------------- Chroma processing -------------- */
|
|
/* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
|
|
if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
|
|
{
|
|
LWORD64 chrm_rdopt_cost;
|
|
WORD32 chrm_rdopt_tu_bits;
|
|
|
|
/* Store the current RDOPT cost to enable early exit in chrom_prcs */
|
|
ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
|
|
|
|
chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
|
|
ps_ctxt,
|
|
curr_buf_idx,
|
|
0, /* TU mode : Don't care in Inter patrh */
|
|
ps_chrm_cu_buf_prms->pu1_curr_src,
|
|
ps_chrm_cu_buf_prms->i4_chrm_src_stride,
|
|
ps_chrm_cu_buf_prms->pu1_cu_left,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top_left,
|
|
ps_chrm_cu_buf_prms->i4_cu_left_stride,
|
|
(cu_pos_x >> 1),
|
|
(cu_pos_y >> 1),
|
|
&chrm_rdopt_tu_bits,
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy);
|
|
|
|
#if WEIGH_CHROMA_COST
|
|
chrm_rdopt_cost = (LWORD64)(
|
|
(chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
|
|
#if CHROMA_RDOPT_ENABLE
|
|
total_rdopt_cost += chrm_rdopt_cost;
|
|
#endif
|
|
cu_bits += chrm_rdopt_tu_bits;
|
|
|
|
/* during chroma evaluation if skip decision was over written */
|
|
/* then the current skip candidate is set to a non skip candidate */
|
|
ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
|
|
|
|
/* cu bits for chroma residual if chroma rdopt is on */
|
|
/* if zero_cbf eval is disabled then cu bits will be zero */
|
|
ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(total_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
return (total_rdopt_cost);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{}
|
|
|
|
#if SHRINK_INTER_TUTREE
|
|
/* ------------- Quadtree TU split optimization ------------ */
|
|
if(ps_final_prms->u1_is_cu_coded)
|
|
{
|
|
ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
|
|
&ps_final_prms->as_tu_enc_loop[0],
|
|
&ps_final_prms->as_tu_enc_loop_temp_prms[0],
|
|
&ps_final_prms->s_recon_datastore,
|
|
num_tu_in_cu,
|
|
(ps_ctxt->u1_chroma_array_type == 2));
|
|
}
|
|
#endif
|
|
|
|
/* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* -------- Bit estimate for RD opt -------------- */
|
|
{
|
|
nbr_avail_flags_t s_nbr;
|
|
/*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
|
|
WORD32 cbf_bits, header_bits;
|
|
|
|
/* get the neighbour availability flags for current cu */
|
|
ihevce_get_only_nbr_flag(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
(cu_size >> 2),
|
|
(cu_size >> 2));
|
|
|
|
/* call the entropy rdo encode to get the bit estimate for current cu */
|
|
header_bits = ihevce_entropy_rdo_encode_cu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_final_prms,
|
|
(cu_pos_x >> 1), /* back to 8x8 pel units */
|
|
(cu_pos_y >> 1), /* back to 8x8 pel units */
|
|
cu_size,
|
|
ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
|
|
: s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
&ps_final_prms->pu1_cu_coeffs[0],
|
|
&cbf_bits);
|
|
|
|
cu_bits += header_bits;
|
|
|
|
/* cbf bits are excluded from header bits, instead considered as texture bits */
|
|
/* incase if zero cbf eval is disabled then texture bits gets added here */
|
|
ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
|
|
ps_final_prms->u4_cu_cbf_bits = cbf_bits;
|
|
|
|
#if RDOPT_ENABLE
|
|
/* add the cost of coding the header bits */
|
|
total_rdopt_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
/* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
|
|
if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
|
|
{
|
|
LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
|
|
|
|
WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
|
|
(1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
|
|
|
|
cab_ctxt_t *ps_cab_ctxt =
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
|
|
|
|
/* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
|
|
UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
|
|
|
|
/* account for coding qt_root_cbf = 0 */
|
|
/* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
|
|
u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
|
|
if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
|
|
u4_cu_hdr_bits_q12 = 0;
|
|
else
|
|
u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
|
|
|
|
/* add the cost of coding the header bits */
|
|
i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
|
|
u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
|
|
ps_ctxt->i8_cl_ssd_lambda_qf,
|
|
(LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
|
|
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
i8_cu_not_coded_cost = total_rdopt_cost + 1;
|
|
}
|
|
|
|
/* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
|
|
if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
|
|
{
|
|
WORD32 tx_size;
|
|
|
|
/* force cu as not coded and update the cost */
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
|
|
|
|
total_rdopt_cost = i8_cu_not_coded_cost;
|
|
|
|
/* reset num TUs to 1 unless cu size id 64 */
|
|
ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
|
|
trans_size = (64 == cu_size) ? 32 : cu_size;
|
|
GETRANGE(tx_size, trans_size);
|
|
|
|
/* reset the bytes consumed */
|
|
ps_final_prms->i4_num_bytes_ecd_data = 0;
|
|
|
|
/* reset texture related bits and roll back header bits*/
|
|
ps_final_prms->u4_cu_cbf_bits = 0;
|
|
ps_final_prms->u4_cu_luma_res_bits = 0;
|
|
ps_final_prms->u4_cu_chroma_res_bits = 0;
|
|
ps_final_prms->u4_cu_hdr_bits =
|
|
(u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
|
|
|
|
/* update cabac model with qtroot cbf = 0 decision */
|
|
ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
|
|
gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
|
|
|
|
/* restore untouched cabac models for, tusplit, cbfs, texture etc */
|
|
memcpy(
|
|
&ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
|
|
&au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
|
|
(IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
|
|
|
|
/* mark all tus as not coded for final eval */
|
|
for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
|
|
{
|
|
WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
|
|
WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
|
|
|
|
nbr_4x4_t *ps_cur_nbr_4x4 =
|
|
ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
|
|
|
|
num_4x4_in_tu = trans_size >> 2;
|
|
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
|
|
|
|
/* reset cbf for the all 4x4 in TU */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
ps_tmp_4x4 = ps_cur_nbr_4x4;
|
|
|
|
for(i = 0; i < num_4x4_in_tu; i++)
|
|
{
|
|
for(j = 0; j < num_4x4_in_tu; j++)
|
|
{
|
|
ps_tmp_4x4[j].b1_y_cbf = 0;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += num_4x4_in_cu;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif /* ENABLE_INTER_ZCU_COST */
|
|
|
|
#endif /* RDOPT_ENABLE */
|
|
}
|
|
|
|
return (total_rdopt_cost);
|
|
}
|
|
|
|
#if ENABLE_RDO_BASED_TU_RECURSION
|
|
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_cu_prms_t *ps_cu_prms,
|
|
void *pv_src,
|
|
WORD32 cu_size,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
WORD32 curr_buf_idx,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
cu_inter_cand_t *ps_inter_cand,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
WORD32 i4_alpha_stim_multiplier)
|
|
{
|
|
tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
|
|
buffer_data_for_tu_t s_buffer_data_for_tu;
|
|
enc_loop_cu_final_prms_t *ps_final_prms;
|
|
nbr_4x4_t *ps_nbr_4x4;
|
|
|
|
WORD32 num_split_flags = 1;
|
|
UWORD8 u1_tu_size;
|
|
UWORD8 *pu1_pred;
|
|
UWORD8 *pu1_ecd_data;
|
|
WORD16 *pi2_deq_data;
|
|
UWORD8 *pu1_csbf_buf;
|
|
UWORD8 *pu1_tu_sz_sft;
|
|
UWORD8 *pu1_tu_posx;
|
|
UWORD8 *pu1_tu_posy;
|
|
LWORD64 total_rdopt_cost;
|
|
WORD32 ctr;
|
|
WORD32 chrm_ctr;
|
|
WORD32 pred_stride;
|
|
WORD32 recon_stride;
|
|
WORD32 trans_size = ps_cu_analyse->u1_cu_size;
|
|
WORD32 csbf_strd;
|
|
WORD32 ecd_data_bytes_cons;
|
|
WORD32 num_4x4_in_cu;
|
|
WORD32 num_4x4_in_tu;
|
|
WORD32 recon_func_mode;
|
|
WORD32 cu_bits;
|
|
UWORD8 u1_compute_spatial_ssd;
|
|
/* backup copy of cabac states for restoration if zero cu reside rdo wins later */
|
|
UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
|
|
|
|
WORD32 i4_min_trans_size = 256;
|
|
LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
|
|
WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
|
|
/* model for no residue syntax qt root cbf flag */
|
|
UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
|
|
UWORD8 u1_skip_tu_sz_sft = 0;
|
|
UWORD8 u1_skip_tu_posx = 0;
|
|
UWORD8 u1_skip_tu_posy = 0;
|
|
UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
|
|
|
|
ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
|
|
ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
|
|
pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
|
|
pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
|
|
csbf_strd = ps_ctxt->i4_cu_csbf_strd;
|
|
pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
|
|
pred_stride = ps_inter_cand->i4_pred_data_stride;
|
|
recon_stride = cu_size;
|
|
pu1_pred = ps_inter_cand->pu1_pred_data;
|
|
chrm_ctr = 0;
|
|
ecd_data_bytes_cons = 0;
|
|
total_rdopt_cost = 0;
|
|
num_4x4_in_cu = cu_size >> 2;
|
|
recon_func_mode = PRED_MODE_INTER;
|
|
cu_bits = 0;
|
|
|
|
/* get the 4x4 level postion of current cu */
|
|
cu_pos_x = cu_pos_x << 1;
|
|
cu_pos_y = cu_pos_y << 1;
|
|
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
ps_final_prms->u4_cu_sad = 0;
|
|
|
|
/* populate the coeffs scan idx */
|
|
ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
/* reset cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost = 0;
|
|
|
|
/* backup copy of cabac states for restoration if zero cu reside rdo wins later */
|
|
memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
|
|
#endif
|
|
|
|
if(ps_cu_analyse->u1_cu_size == 64)
|
|
{
|
|
num_split_flags = 4;
|
|
u1_tu_size = 32;
|
|
}
|
|
else
|
|
{
|
|
num_split_flags = 1;
|
|
u1_tu_size = ps_cu_analyse->u1_cu_size;
|
|
}
|
|
|
|
if(1 == ps_final_prms->u1_skip_flag)
|
|
{
|
|
if(64 == cu_size)
|
|
{
|
|
/* TU = CU/2 is set but no trnaform is evaluated */
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
|
|
}
|
|
else
|
|
{
|
|
/* TU = CU is set but no trnaform is evaluated */
|
|
pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
|
|
pu1_tu_posx = &u1_skip_tu_posx;
|
|
pu1_tu_posy = &u1_skip_tu_posy;
|
|
}
|
|
|
|
recon_func_mode = PRED_MODE_SKIP;
|
|
}
|
|
/* check for PU part mode being AMP or No AMP */
|
|
else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
|
|
{
|
|
if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
|
|
{
|
|
/* TU= CU is evaluated 2Nx2N inter case */
|
|
pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
|
|
pu1_tu_posx = &u1_skip_tu_posx;
|
|
pu1_tu_posy = &u1_skip_tu_posy;
|
|
}
|
|
else
|
|
{
|
|
/* currently TU= CU/2 is evaluated for all inter case */
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* for AMP cases one level of TU recurssion is done */
|
|
/* based on oreintation of the partitions */
|
|
pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
|
|
}
|
|
|
|
i4_min_trans_size = 4;
|
|
|
|
if(ps_ctxt->i1_cu_qp_delta_enable)
|
|
{
|
|
ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
|
|
}
|
|
|
|
if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
ps_ctxt->i8_cl_ssd_lambda_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
|
|
100.0f);
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
|
|
((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
|
|
(100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
|
|
}
|
|
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
|
|
(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
|
|
if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
}
|
|
|
|
if(!u1_compute_spatial_ssd)
|
|
{
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
|
|
ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
|
|
|
|
if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
|
|
{
|
|
ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
|
|
}
|
|
}
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
memcpy(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
ihevce_tu_tree_init(
|
|
as_tu_nodes,
|
|
cu_size,
|
|
(cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
|
|
ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
|
|
INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
|
|
ps_ctxt->u1_chroma_array_type == 2);
|
|
|
|
if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
|
|
{
|
|
ihevce_tuSplitArray_to_tuTree_mapper(
|
|
as_tu_nodes,
|
|
ps_inter_cand->ai4_tu_split_flag,
|
|
cu_size,
|
|
cu_size,
|
|
MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
|
|
MIN(MAX_TU_SIZE, cu_size),
|
|
ps_inter_cand->b1_skip_flag);
|
|
}
|
|
|
|
ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
ps_ctxt->i8_cu_not_coded_cost = 0;
|
|
#endif
|
|
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
|
|
ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
|
|
ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
|
|
ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
|
|
curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
|
|
(MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
|
|
ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
|
|
ps_chrm_cu_buf_prms->i4_chrm_src_stride;
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
|
|
ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
|
|
ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
|
|
s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
|
|
s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
|
|
s_buffer_data_for_tu.pi2_deq_data_chroma =
|
|
pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
|
|
s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
|
|
s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
|
|
s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
|
|
s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
|
|
|
|
if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
|
|
{
|
|
UWORD8 i;
|
|
|
|
UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
|
|
|
|
for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
|
|
{
|
|
pu_t *ps_pu;
|
|
|
|
WORD32 inter_pu_wd;
|
|
WORD32 inter_pu_ht;
|
|
|
|
ps_pu = ps_inter_cand->as_inter_pu + i;
|
|
|
|
inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
|
|
inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
|
|
inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
|
|
ihevce_chroma_inter_pred_pu(
|
|
&ps_ctxt->s_mc_ctxt,
|
|
ps_pu,
|
|
pu1_pred,
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
|
|
if(!!ps_inter_cand->b3_part_size)
|
|
{
|
|
/* 2Nx__ partion case */
|
|
if(inter_pu_wd == cu_size)
|
|
{
|
|
pu1_pred +=
|
|
(inter_pu_ht *
|
|
s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
|
|
}
|
|
|
|
/* __x2N partion case */
|
|
if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
|
|
{
|
|
pu1_pred += inter_pu_wd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#if !ENABLE_TOP_DOWN_TU_RECURSION
|
|
total_rdopt_cost = ihevce_tu_tree_selector(
|
|
ps_ctxt,
|
|
as_tu_nodes,
|
|
&s_buffer_data_for_tu,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
recon_func_mode,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
0,
|
|
ps_ctxt->u1_max_inter_tr_depth,
|
|
ps_inter_cand->b3_part_size,
|
|
u1_compute_spatial_ssd);
|
|
#else
|
|
total_rdopt_cost = ihevce_topDown_tu_tree_selector(
|
|
ps_ctxt,
|
|
as_tu_nodes,
|
|
&s_buffer_data_for_tu,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
recon_func_mode,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
0,
|
|
ps_ctxt->u1_max_inter_tr_depth,
|
|
ps_inter_cand->b3_part_size,
|
|
INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
|
|
u1_compute_spatial_ssd);
|
|
#endif
|
|
|
|
ps_final_prms->u2_num_tus_in_cu = 0;
|
|
ps_final_prms->u4_cu_luma_res_bits = 0;
|
|
ps_final_prms->u4_cu_sad = 0;
|
|
total_rdopt_cost = 0;
|
|
ecd_data_bytes_cons = 0;
|
|
cu_bits = 0;
|
|
#if ENABLE_INTER_ZCU_COST
|
|
ps_ctxt->i8_cu_not_coded_cost = 0;
|
|
#endif
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
ps_final_prms->u1_cu_size = cu_size;
|
|
|
|
ihevce_tu_selector_debriefer(
|
|
as_tu_nodes,
|
|
ps_final_prms,
|
|
&total_rdopt_cost,
|
|
#if ENABLE_INTER_ZCU_COST
|
|
&ps_ctxt->i8_cu_not_coded_cost,
|
|
#endif
|
|
&ecd_data_bytes_cons,
|
|
&cu_bits,
|
|
&ps_final_prms->u2_num_tus_in_cu,
|
|
ps_ctxt->i4_cu_qp,
|
|
cu_pos_x * 4,
|
|
cu_pos_y * 4,
|
|
INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
|
|
(ps_ctxt->u1_chroma_array_type == 2),
|
|
POS_TL);
|
|
|
|
if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
|
|
{
|
|
ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
|
|
}
|
|
|
|
/* Modify the cost function for this CU. */
|
|
/* loop in for 8x8 blocks */
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
UWORD8 *pu1_recon_cu;
|
|
WORD32 recon_stride;
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
WORD32 start_index;
|
|
WORD32 num_horz_cu_in_ctb;
|
|
WORD32 had_block_size;
|
|
|
|
/* tODO: sreenivasa ctb size has to be used appropriately */
|
|
had_block_size = 8;
|
|
num_horz_cu_in_ctb = 64 / had_block_size;
|
|
|
|
curr_pos_x = cu_pos_x << 2; /* pel units */
|
|
curr_pos_y = cu_pos_y << 2; /* pel units */
|
|
recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
|
|
pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
|
|
.apv_luma_recon_bufs[0]); // already pointing to the current CU recon
|
|
//+ \curr_pos_x + curr_pos_y * recon_stride;
|
|
|
|
/* start index to index the source satd of curr cu int he current ctb*/
|
|
start_index =
|
|
(curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
|
|
|
|
{
|
|
total_rdopt_cost += ihevce_psy_rd_cost(
|
|
ps_ctxt->ai4_source_satd_8x8,
|
|
pu1_recon_cu,
|
|
recon_stride,
|
|
1, //howz stride
|
|
cu_size,
|
|
0, // pic type
|
|
0, //layer id
|
|
ps_ctxt->i4_satd_lamda, // lambda
|
|
start_index,
|
|
ps_ctxt->u1_is_input_data_hbd,
|
|
ps_ctxt->u4_psy_strength,
|
|
&ps_ctxt->s_cmn_opt_func); // 8 bit
|
|
}
|
|
}
|
|
|
|
ps_final_prms->u1_chroma_intra_pred_mode = 4;
|
|
|
|
/* update the bytes consumed */
|
|
ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
|
|
|
|
/* store the current cu size to final prms */
|
|
ps_final_prms->u1_cu_size = cu_size;
|
|
/* ------------- Chroma processing -------------- */
|
|
/* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
|
|
if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
|
|
!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
|
|
{
|
|
LWORD64 chrm_rdopt_cost;
|
|
WORD32 chrm_rdopt_tu_bits;
|
|
|
|
/* Store the current RDOPT cost to enable early exit in chrom_prcs */
|
|
ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
|
|
|
|
chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
|
|
ps_ctxt,
|
|
curr_buf_idx,
|
|
0, /* TU mode : Don't care in Inter patrh */
|
|
ps_chrm_cu_buf_prms->pu1_curr_src,
|
|
ps_chrm_cu_buf_prms->i4_chrm_src_stride,
|
|
ps_chrm_cu_buf_prms->pu1_cu_left,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top,
|
|
ps_chrm_cu_buf_prms->pu1_cu_top_left,
|
|
ps_chrm_cu_buf_prms->i4_cu_left_stride,
|
|
(cu_pos_x >> 1),
|
|
(cu_pos_y >> 1),
|
|
&chrm_rdopt_tu_bits,
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy);
|
|
|
|
#if WEIGH_CHROMA_COST
|
|
chrm_rdopt_cost = (LWORD64)(
|
|
(chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
|
|
#if CHROMA_RDOPT_ENABLE
|
|
total_rdopt_cost += chrm_rdopt_cost;
|
|
#endif
|
|
cu_bits += chrm_rdopt_tu_bits;
|
|
|
|
/* during chroma evaluation if skip decision was over written */
|
|
/* then the current skip candidate is set to a non skip candidate */
|
|
ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
|
|
|
|
/* cu bits for chroma residual if chroma rdopt is on */
|
|
/* if zero_cbf eval is disabled then cu bits will be zero */
|
|
ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(total_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
return (total_rdopt_cost);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{}
|
|
|
|
#if SHRINK_INTER_TUTREE
|
|
/* ------------- Quadtree TU split optimization ------------ */
|
|
if(ps_final_prms->u1_is_cu_coded)
|
|
{
|
|
ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
|
|
&ps_final_prms->as_tu_enc_loop[0],
|
|
&ps_final_prms->as_tu_enc_loop_temp_prms[0],
|
|
&ps_final_prms->s_recon_datastore,
|
|
ps_final_prms->u2_num_tus_in_cu,
|
|
(ps_ctxt->u1_chroma_array_type == 2));
|
|
}
|
|
#endif
|
|
|
|
/* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
/* -------- Bit estimate for RD opt -------------- */
|
|
{
|
|
nbr_avail_flags_t s_nbr;
|
|
/*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
|
|
WORD32 cbf_bits, header_bits;
|
|
|
|
/* get the neighbour availability flags for current cu */
|
|
ihevce_get_only_nbr_flag(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
(cu_size >> 2),
|
|
(cu_size >> 2));
|
|
|
|
/* call the entropy rdo encode to get the bit estimate for current cu */
|
|
header_bits = ihevce_entropy_rdo_encode_cu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_final_prms,
|
|
(cu_pos_x >> 1), /* back to 8x8 pel units */
|
|
(cu_pos_y >> 1), /* back to 8x8 pel units */
|
|
cu_size,
|
|
ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
|
|
: s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
&ps_final_prms->pu1_cu_coeffs[0],
|
|
&cbf_bits);
|
|
|
|
cu_bits += header_bits;
|
|
|
|
/* cbf bits are excluded from header bits, instead considered as texture bits */
|
|
/* incase if zero cbf eval is disabled then texture bits gets added here */
|
|
ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
|
|
ps_final_prms->u4_cu_cbf_bits = cbf_bits;
|
|
|
|
#if RDOPT_ENABLE
|
|
/* add the cost of coding the header bits */
|
|
total_rdopt_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
/* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
|
|
if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
|
|
{
|
|
LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
|
|
|
|
WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
|
|
(1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
|
|
|
|
cab_ctxt_t *ps_cab_ctxt =
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
|
|
|
|
/* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
|
|
UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
|
|
|
|
/* account for coding qt_root_cbf = 0 */
|
|
/* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
|
|
u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
|
|
if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
|
|
u4_cu_hdr_bits_q12 = 0;
|
|
else
|
|
u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
|
|
|
|
/* add the cost of coding the header bits */
|
|
i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
|
|
u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
|
|
ps_ctxt->i8_cl_ssd_lambda_qf,
|
|
(LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
|
|
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
i8_cu_not_coded_cost = total_rdopt_cost + 1;
|
|
}
|
|
|
|
/* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
|
|
if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
|
|
{
|
|
WORD32 tx_size;
|
|
|
|
/* force cu as not coded and update the cost */
|
|
ps_final_prms->u1_is_cu_coded = 0;
|
|
ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
|
|
ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
|
|
|
|
total_rdopt_cost = i8_cu_not_coded_cost;
|
|
|
|
/* reset num TUs to 1 unless cu size id 64 */
|
|
ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
|
|
trans_size = (64 == cu_size) ? 32 : cu_size;
|
|
GETRANGE(tx_size, trans_size);
|
|
|
|
/* reset the bytes consumed */
|
|
ps_final_prms->i4_num_bytes_ecd_data = 0;
|
|
|
|
/* reset texture related bits and roll back header bits*/
|
|
ps_final_prms->u4_cu_cbf_bits = 0;
|
|
ps_final_prms->u4_cu_luma_res_bits = 0;
|
|
ps_final_prms->u4_cu_chroma_res_bits = 0;
|
|
ps_final_prms->u4_cu_hdr_bits =
|
|
(u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
|
|
|
|
/* update cabac model with qtroot cbf = 0 decision */
|
|
ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
|
|
gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
|
|
|
|
/* restore untouched cabac models for, tusplit, cbfs, texture etc */
|
|
memcpy(
|
|
&ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
|
|
&au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
|
|
(IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
|
|
|
|
/* mark all tus as not coded for final eval */
|
|
for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
|
|
{
|
|
WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
|
|
WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
|
|
|
|
nbr_4x4_t *ps_cur_nbr_4x4 =
|
|
ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
|
|
|
|
num_4x4_in_tu = trans_size >> 2;
|
|
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
|
|
ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
|
|
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
|
|
ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
|
|
|
|
/* reset cbf for the all 4x4 in TU */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
ps_tmp_4x4 = ps_cur_nbr_4x4;
|
|
|
|
for(i = 0; i < num_4x4_in_tu; i++)
|
|
{
|
|
for(j = 0; j < num_4x4_in_tu; j++)
|
|
{
|
|
ps_tmp_4x4[j].b1_y_cbf = 0;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += num_4x4_in_cu;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif /* ENABLE_INTER_ZCU_COST */
|
|
|
|
#endif /* RDOPT_ENABLE */
|
|
}
|
|
|
|
return (total_rdopt_cost);
|
|
}
|
|
#endif
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
|
|
*
|
|
* \brief
|
|
* Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] ps_inter_cand pointer to inter candidate structure
|
|
* \param[in] cu_size Current CU size
|
|
* \param[in] cu_pos_x cu position x w.r.t to ctb
|
|
* \param[in] cu_pos_y cu position y w.r.t to ctb
|
|
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
|
|
* \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer
|
|
* \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer
|
|
* \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride
|
|
* \param[in] curr_buf_idx Current Buffer index
|
|
*
|
|
* \return
|
|
* Rdopt cost
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
cu_inter_cand_t *ps_inter_cand,
|
|
WORD32 cu_size,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
nbr_4x4_t *ps_left_nbr_4x4,
|
|
nbr_4x4_t *ps_top_nbr_4x4,
|
|
nbr_4x4_t *ps_topleft_nbr_4x4,
|
|
WORD32 nbr_4x4_left_strd,
|
|
WORD32 curr_buf_idx)
|
|
{
|
|
/* local variables */
|
|
enc_loop_cu_final_prms_t *ps_final_prms;
|
|
nbr_avail_flags_t s_nbr;
|
|
nbr_4x4_t *ps_nbr_4x4;
|
|
|
|
UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
|
|
UWORD8 *pu1_pred;
|
|
WORD32 rdopt_cost;
|
|
WORD32 ctr;
|
|
WORD32 num_cu_part;
|
|
WORD32 inter_pu_wd;
|
|
WORD32 inter_pu_ht;
|
|
WORD32 pred_stride;
|
|
|
|
/* get the pointers based on curbuf idx */
|
|
ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
|
|
ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
|
|
pu1_pred = ps_inter_cand->pu1_pred_data;
|
|
|
|
pred_stride = ps_inter_cand->i4_pred_data_stride;
|
|
|
|
/* store the partition mode in final prms */
|
|
ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
|
|
|
|
/* since encoder does not support NXN part type */
|
|
/* num parts can be either 1 or 2 only */
|
|
ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
|
|
|
|
num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
|
|
|
|
/* get the 4x4 level position of current cu */
|
|
cu_pos_x = cu_pos_x << 1;
|
|
cu_pos_y = cu_pos_y << 1;
|
|
|
|
/* populate cu level params */
|
|
ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
|
|
ps_final_prms->u2_num_pus_in_cu = num_cu_part;
|
|
|
|
/* run a loop over all the partitons in cu */
|
|
for(ctr = 0; ctr < num_cu_part; ctr++)
|
|
{
|
|
pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
|
|
pu_t *ps_pu;
|
|
WORD32 skip_or_merge_flag;
|
|
UWORD8 u1_use_mvp_from_top_row;
|
|
|
|
ps_pu = &ps_inter_cand->as_inter_pu[ctr];
|
|
|
|
/* IF AMP then each partitions can have diff wd ht */
|
|
inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
|
|
inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
|
|
|
|
/* populate reference pic buf id for bs compute */
|
|
|
|
/* L0 */
|
|
if(-1 != ps_pu->mv.i1_l0_ref_idx)
|
|
{
|
|
ps_pu->mv.i1_l0_ref_pic_buf_id =
|
|
ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
|
|
}
|
|
|
|
/* L1 */
|
|
if(-1 != ps_pu->mv.i1_l1_ref_idx)
|
|
{
|
|
ps_pu->mv.i1_l1_ref_pic_buf_id =
|
|
ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
|
|
}
|
|
|
|
/* SKIP or merge check for every part */
|
|
skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
|
|
|
|
/* ----------- MV Prediction ----------------- */
|
|
if(0 == skip_or_merge_flag)
|
|
{
|
|
/* get the neighbour availability flags */
|
|
ihevce_get_only_nbr_flag(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
inter_pu_wd >> 2,
|
|
inter_pu_ht >> 2);
|
|
|
|
if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
|
|
{
|
|
u1_use_mvp_from_top_row = 0;
|
|
}
|
|
else
|
|
{
|
|
u1_use_mvp_from_top_row = 1;
|
|
}
|
|
|
|
if(!u1_use_mvp_from_top_row)
|
|
{
|
|
if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
|
|
{
|
|
if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
|
|
{
|
|
WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
|
|
|
|
/* Ensure Top Right Sync */
|
|
if(!ps_ctxt->u1_use_top_at_ctb_boundary)
|
|
{
|
|
curr_cu_pos_in_row =
|
|
ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
|
|
|
|
if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
|
|
{
|
|
/* No wait for 1st row */
|
|
cu_top_right_offset = -(MAX_CTB_SIZE);
|
|
{
|
|
ihevce_tile_params_t *ps_col_tile_params =
|
|
((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
|
|
ps_ctxt->i4_tile_col_idx);
|
|
|
|
/* No wait for 1st row */
|
|
cu_top_right_offset =
|
|
-(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
|
|
}
|
|
cu_top_right_dep_pos = 0;
|
|
}
|
|
else
|
|
{
|
|
cu_top_right_offset = (cu_size) + 4;
|
|
cu_top_right_dep_pos =
|
|
(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
|
|
}
|
|
|
|
ihevce_dmgr_chk_row_row_sync(
|
|
ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
|
|
curr_cu_pos_in_row,
|
|
cu_top_right_offset,
|
|
cu_top_right_dep_pos,
|
|
ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
|
|
ps_ctxt->thrd_id);
|
|
}
|
|
|
|
u1_use_mvp_from_top_row = 1;
|
|
}
|
|
else
|
|
{
|
|
s_nbr.u1_top_avail = 0;
|
|
s_nbr.u1_top_lt_avail = 0;
|
|
s_nbr.u1_top_rt_avail = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u1_use_mvp_from_top_row = 1;
|
|
}
|
|
}
|
|
/* Call the MV prediction module to get MVP */
|
|
ihevce_mv_pred(
|
|
&ps_ctxt->s_mv_pred_ctxt,
|
|
ps_top_nbr_4x4,
|
|
ps_left_nbr_4x4,
|
|
ps_topleft_nbr_4x4,
|
|
nbr_4x4_left_strd,
|
|
&s_nbr,
|
|
NULL, /* colocated MV */
|
|
ps_pu,
|
|
&as_pred_mv[0],
|
|
au1_is_top_used);
|
|
}
|
|
|
|
/* store the nbr 4x4 structure */
|
|
ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
|
|
ps_nbr_4x4->b1_intra_flag = 0;
|
|
ps_nbr_4x4->b1_pred_l0_flag = 0;
|
|
ps_nbr_4x4->b1_pred_l1_flag = 0;
|
|
|
|
/* DC is default mode for inter cu, required for intra mode signalling */
|
|
ps_nbr_4x4->b6_luma_intra_mode = 1;
|
|
|
|
/* copy the motion vectors to neighbour structure */
|
|
ps_nbr_4x4->mv = ps_pu->mv;
|
|
|
|
/* copy the PU to final out pu */
|
|
ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
|
|
|
|
/* copy the PU to chroma */
|
|
ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
|
|
|
|
/* store the skip flag to final prms */
|
|
ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
|
|
|
|
/* MVP index & MVD calc is gated on skip/merge flag */
|
|
if(0 == skip_or_merge_flag)
|
|
{
|
|
/* calculate the MVDs and popluate the MVP idx for L0 */
|
|
if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
|
|
{
|
|
WORD32 idx0_cost, idx1_cost;
|
|
|
|
/* calculate the ABS mvd for cand 0 */
|
|
idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
|
|
idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
|
|
|
|
/* calculate the ABS mvd for cand 1 */
|
|
if(u1_use_mvp_from_top_row)
|
|
{
|
|
idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
|
|
idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
|
|
}
|
|
else
|
|
{
|
|
idx1_cost = INT_MAX;
|
|
}
|
|
|
|
/* based on the least cost choose the mvp idx */
|
|
if(idx0_cost <= idx1_cost)
|
|
{
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
|
|
as_pred_mv[0].s_l0_mv.i2_mvx;
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
|
|
as_pred_mv[0].s_l0_mv.i2_mvy;
|
|
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
|
|
as_pred_mv[1].s_l0_mv.i2_mvx;
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
|
|
as_pred_mv[1].s_l0_mv.i2_mvy;
|
|
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
|
|
}
|
|
|
|
/* set the pred l0 flag for neighbour storage */
|
|
ps_nbr_4x4->b1_pred_l0_flag = 1;
|
|
}
|
|
/* calculate the MVDs and popluate the MVP idx for L1 */
|
|
if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
|
|
{
|
|
WORD32 idx0_cost, idx1_cost;
|
|
|
|
/* calculate the ABS mvd for cand 0 */
|
|
idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
|
|
idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
|
|
|
|
/* calculate the ABS mvd for cand 1 */
|
|
if(u1_use_mvp_from_top_row)
|
|
{
|
|
idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
|
|
idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
|
|
}
|
|
else
|
|
{
|
|
idx1_cost = INT_MAX;
|
|
}
|
|
|
|
/* based on the least cost choose the mvp idx */
|
|
if(idx0_cost <= idx1_cost)
|
|
{
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
|
|
as_pred_mv[0].s_l1_mv.i2_mvx;
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
|
|
as_pred_mv[0].s_l1_mv.i2_mvy;
|
|
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
|
|
as_pred_mv[1].s_l1_mv.i2_mvx;
|
|
ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
|
|
as_pred_mv[1].s_l1_mv.i2_mvy;
|
|
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
|
|
}
|
|
|
|
/* set the pred l1 flag for neighbour storage */
|
|
ps_nbr_4x4->b1_pred_l1_flag = 1;
|
|
}
|
|
|
|
/* set the merge flag to 0 */
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
|
|
ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
|
|
}
|
|
else
|
|
{
|
|
/* copy the merge index from candidate */
|
|
ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
|
|
|
|
ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
|
|
|
|
if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
|
|
{
|
|
/* set the pred l0 flag for neighbour storage */
|
|
ps_nbr_4x4->b1_pred_l0_flag = 1;
|
|
}
|
|
|
|
/* calculate the MVDs and popluate the MVP idx for L1 */
|
|
if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
|
|
{
|
|
/* set the pred l1 flag for neighbour storage */
|
|
ps_nbr_4x4->b1_pred_l1_flag = 1;
|
|
}
|
|
}
|
|
|
|
/* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
|
|
rdopt_cost = 0;
|
|
|
|
/* copy the MV to colocated Mv structure */
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
|
|
ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
|
|
|
|
/* replicate neighbour 4x4 strcuture for entire partition */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
|
|
ps_tmp_4x4 = ps_nbr_4x4;
|
|
|
|
for(i = 0; i < (inter_pu_ht >> 2); i++)
|
|
{
|
|
for(j = 0; j < (inter_pu_wd >> 2); j++)
|
|
{
|
|
ps_tmp_4x4[j] = *ps_nbr_4x4;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += (cu_size >> 2);
|
|
}
|
|
}
|
|
/* set the neighbour map to 1 */
|
|
ihevce_set_inter_nbr_map(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
(inter_pu_wd >> 2),
|
|
(inter_pu_ht >> 2),
|
|
1);
|
|
/* ----------- Motion Compensation for Luma ----------- */
|
|
#if !ENABLE_MIXED_INTER_MODE_EVAL
|
|
{
|
|
IV_API_CALL_STATUS_T valid_mv_cand;
|
|
|
|
/*If the inter candidate is neither merge cand nor skip cand
|
|
then calculate the mc.*/
|
|
if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
|
|
{
|
|
valid_mv_cand =
|
|
ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
|
|
|
|
/* assert if the MC is given a valid mv candidate */
|
|
ASSERT(valid_mv_cand == IV_SUCCESS);
|
|
}
|
|
}
|
|
#endif
|
|
if((2 == num_cu_part) && (0 == ctr))
|
|
{
|
|
/* 2Nx__ partion case */
|
|
if(inter_pu_wd == cu_size)
|
|
{
|
|
cu_pos_y += (inter_pu_ht >> 2);
|
|
pu1_pred += (inter_pu_ht * pred_stride);
|
|
ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
|
|
ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
|
|
ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
|
|
ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
|
|
}
|
|
|
|
/* __x2N partion case */
|
|
if(inter_pu_ht == cu_size)
|
|
{
|
|
cu_pos_x += (inter_pu_wd >> 2);
|
|
pu1_pred += inter_pu_wd;
|
|
ps_nbr_4x4 += (inter_pu_wd >> 2);
|
|
ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
|
|
ps_top_nbr_4x4 += (inter_pu_wd >> 2);
|
|
ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
|
|
nbr_4x4_left_strd = (cu_size >> 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
return (rdopt_cost);
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
|
|
*
|
|
* \brief
|
|
* Coding unit processing function for chroma special modes (Non-Luma modes)
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms
|
|
* \param[in] ps_cu_analyse pointer to cu analyse
|
|
* \param[in] rd_opt_curr_idx index in the array of RDopt params
|
|
* \param[in] tu_mode TU_EQ_CU or other case
|
|
*
|
|
* \return
|
|
* Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
|
|
cu_analyse_t *ps_cu_analyse,
|
|
ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
|
|
pf_intra_pred *ppf_chroma_ip,
|
|
pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
|
|
UWORD8 *pu1_src,
|
|
WORD32 i4_src_stride,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 i4_pred_stride,
|
|
UWORD8 *pu1_ctb_nbr_map,
|
|
WORD32 i4_nbr_map_strd,
|
|
UWORD8 *pu1_ref_sub_out,
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy,
|
|
UWORD8 u1_trans_size,
|
|
UWORD8 u1_trans_idx,
|
|
UWORD8 u1_num_tus_in_cu,
|
|
UWORD8 u1_num_4x4_luma_blks_in_tu,
|
|
UWORD8 u1_enable_psyRDOPT,
|
|
UWORD8 u1_is_422)
|
|
{
|
|
UWORD8 u1_chrm_mode;
|
|
UWORD8 ctr;
|
|
WORD32 i4_subtu_idx;
|
|
|
|
WORD32 i = 0;
|
|
UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
|
|
WORD32 i4_satd_had[4] = { 0 };
|
|
WORD32 i4_best_satd_had = INT_MAX;
|
|
UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
|
|
UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
|
|
WORD32 i4_num_sub_tus = u1_is_422 + 1;
|
|
UWORD8 u1_best_chrm_mode = 0;
|
|
|
|
/* Get the best satd among all possible modes */
|
|
for(i = 0; i < 4; i++)
|
|
{
|
|
WORD32 left_strd = i4_src_stride;
|
|
|
|
u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
|
|
: u1_chrm_modes[i];
|
|
|
|
/* loop based on num tus in a cu */
|
|
for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
|
|
{
|
|
WORD32 luma_nbr_flags;
|
|
WORD32 chrm_pred_func_idx;
|
|
|
|
WORD32 i4_trans_size_m2 = u1_trans_size << 1;
|
|
UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
|
|
(((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
|
|
UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
|
|
(((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
|
|
WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
|
|
WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
|
|
|
|
luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
|
|
pu1_ctb_nbr_map,
|
|
i4_nbr_map_strd,
|
|
i4_curr_tu_pos_x,
|
|
i4_curr_tu_pos_y,
|
|
u1_num_4x4_luma_blks_in_tu,
|
|
u1_num_4x4_luma_blks_in_tu);
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
|
|
{
|
|
WORD32 nbr_flags;
|
|
|
|
UWORD8 *pu1_cur_src =
|
|
pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
|
|
UWORD8 *pu1_cur_pred =
|
|
pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
|
|
UWORD8 *pu1_left = pu1_cur_src - 2;
|
|
UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
|
|
UWORD8 *pu1_top_left = pu1_top - 2;
|
|
|
|
nbr_flags = ihevce_get_intra_chroma_tu_nbr(
|
|
luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
|
|
|
|
/* call the chroma reference array substitution */
|
|
pf_ref_substitution(
|
|
pu1_top_left,
|
|
pu1_top,
|
|
pu1_left,
|
|
left_strd,
|
|
u1_trans_size,
|
|
nbr_flags,
|
|
pu1_ref_sub_out,
|
|
1);
|
|
|
|
/* use the look up to get the function idx */
|
|
chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ppf_chroma_ip[chrm_pred_func_idx](
|
|
pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
|
|
|
|
if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
|
|
{
|
|
/* compute Hadamard-transform satd : Cb */
|
|
i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
|
|
pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
|
|
|
|
/* compute Hadamard-transform satd : Cr */
|
|
i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
|
|
pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
|
|
}
|
|
else
|
|
{
|
|
WORD32 i4_satd;
|
|
|
|
/* compute Hadamard-transform satd : Cb */
|
|
i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
|
|
pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
|
|
|
|
i4_satd = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
i4_src_stride,
|
|
pu1_cur_pred,
|
|
i4_pred_stride,
|
|
i4_satd,
|
|
i4_alpha_stim_multiplier,
|
|
u1_trans_size,
|
|
0,
|
|
u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
|
|
i4_satd_had[i] += i4_satd;
|
|
|
|
/* compute Hadamard-transform satd : Cr */
|
|
i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
|
|
pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
|
|
|
|
i4_satd = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
i4_src_stride,
|
|
pu1_cur_pred,
|
|
i4_pred_stride,
|
|
i4_satd,
|
|
i4_alpha_stim_multiplier,
|
|
u1_trans_size,
|
|
0,
|
|
u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
|
|
i4_satd_had[i] += i4_satd;
|
|
}
|
|
}
|
|
|
|
/* set the neighbour map to 1 */
|
|
ihevce_set_nbr_map(
|
|
pu1_ctb_nbr_map,
|
|
i4_nbr_map_strd,
|
|
i4_curr_tu_pos_x,
|
|
i4_curr_tu_pos_y,
|
|
u1_num_4x4_luma_blks_in_tu,
|
|
1);
|
|
}
|
|
|
|
/* set the neighbour map to 0 */
|
|
ihevce_set_nbr_map(
|
|
pu1_ctb_nbr_map,
|
|
i4_nbr_map_strd,
|
|
(ps_cu_analyse->b3_cu_pos_x << 1),
|
|
(ps_cu_analyse->b3_cu_pos_y << 1),
|
|
(ps_cu_analyse->u1_cu_size >> 2),
|
|
0);
|
|
|
|
/* Get the least SATD and corresponding mode */
|
|
if(i4_best_satd_had > i4_satd_had[i])
|
|
{
|
|
i4_best_satd_had = i4_satd_had[i];
|
|
u1_best_chrm_mode = u1_chrm_mode;
|
|
}
|
|
}
|
|
|
|
return u1_best_chrm_mode;
|
|
}
|
|
|
|
void ihevce_intra_chroma_pred_mode_selector(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
|
|
cu_analyse_t *ps_cu_analyse,
|
|
WORD32 rd_opt_curr_idx,
|
|
WORD32 tu_mode,
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy)
|
|
{
|
|
chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
|
|
|
|
ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
|
|
UWORD8 *pu1_pred;
|
|
WORD32 trans_size;
|
|
WORD32 num_tus_in_cu;
|
|
WORD32 pred_strd;
|
|
WORD32 ctr;
|
|
WORD32 i4_subtu_idx;
|
|
WORD32 i4_num_sub_tus;
|
|
WORD32 trans_idx;
|
|
WORD32 scan_idx;
|
|
WORD32 num_4x4_luma_in_tu;
|
|
WORD32 cu_pos_x;
|
|
WORD32 cu_pos_y;
|
|
|
|
recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
|
|
&ps_ctxt->as_cu_prms[1].s_recon_datastore };
|
|
|
|
LWORD64 chrm_cod_cost = 0;
|
|
WORD32 chrm_tu_bits = 0;
|
|
WORD32 best_chrm_mode = DM_CHROMA_IDX;
|
|
UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
|
|
WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
|
|
UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
|
|
UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
|
|
UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
|
|
WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
|
|
WORD32 cu_size = ps_cu_analyse->u1_cu_size;
|
|
WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
|
|
WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
|
|
UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
|
|
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
i4_num_sub_tus = (u1_is_422 == 1) + 1;
|
|
|
|
#if DISABLE_RDOQ_INTRA
|
|
i4_perform_rdoq = 0;
|
|
#endif
|
|
|
|
if(TU_EQ_CU == tu_mode)
|
|
{
|
|
num_tus_in_cu = 1;
|
|
trans_size = cu_size >> 1;
|
|
num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
|
|
ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
|
|
}
|
|
else
|
|
{
|
|
num_tus_in_cu = 4;
|
|
trans_size = cu_size >> 2;
|
|
num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
|
|
|
|
/* For 8x8 CU only one TU */
|
|
if(MIN_TU_SIZE > trans_size)
|
|
{
|
|
trans_size = MIN_TU_SIZE;
|
|
num_tus_in_cu = 1;
|
|
/* chroma nbr avail. is derived based on luma.
|
|
for 4x4 chrm use 8x8 luma's size */
|
|
num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
|
|
}
|
|
|
|
ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
|
|
}
|
|
|
|
/* Can't be TU_EQ_SUBCU case */
|
|
ASSERT(TU_EQ_SUBCU != tu_mode);
|
|
|
|
/* translate the transform size to index */
|
|
trans_idx = trans_size >> 2;
|
|
|
|
pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
|
|
|
|
pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
|
|
|
|
/* for 16x16 cases */
|
|
if(16 == trans_size)
|
|
{
|
|
trans_idx = 3;
|
|
}
|
|
|
|
best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
|
|
ps_cu_analyse,
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr,
|
|
ps_ctxt->apf_chrm_ip,
|
|
ps_ctxt->apf_chrm_resd_trns_had,
|
|
pu1_chrm_src,
|
|
chrm_src_stride,
|
|
pu1_pred,
|
|
pred_strd,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
trans_size,
|
|
trans_idx,
|
|
num_tus_in_cu,
|
|
num_4x4_luma_in_tu,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
u1_is_422);
|
|
|
|
/* Store the best chroma mode */
|
|
ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
|
|
|
|
/* evaluate RDOPT cost for the Best mode */
|
|
{
|
|
WORD32 i4_subtu_pos_x;
|
|
WORD32 i4_subtu_pos_y;
|
|
UWORD8 u1_compute_spatial_ssd;
|
|
|
|
WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
|
|
WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
|
|
/* State for prefix bin of chroma intra pred mode before CU encode */
|
|
UWORD8 u1_chroma_intra_mode_prefix_state =
|
|
ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
|
|
WORD32 luma_trans_size = trans_size << 1;
|
|
WORD32 calc_recon = 0;
|
|
UWORD8 *pu1_left = pu1_cu_left;
|
|
UWORD8 *pu1_top = pu1_cu_top;
|
|
UWORD8 *pu1_top_left = pu1_cu_top_left;
|
|
WORD32 left_strd = cu_left_stride;
|
|
|
|
if(ps_ctxt->i1_cu_qp_delta_enable)
|
|
{
|
|
ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
|
|
}
|
|
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
|
|
(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
|
|
if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
}
|
|
|
|
/* get the 4x4 level postion of current cu */
|
|
cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
|
|
cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
|
|
|
|
calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
|
|
|
|
if(calc_recon || u1_compute_spatial_ssd)
|
|
{
|
|
aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
|
|
aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
|
|
}
|
|
else
|
|
{
|
|
aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
|
|
aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
|
|
}
|
|
|
|
/* loop based on num tus in a cu */
|
|
for(ctr = 0; ctr < num_tus_in_cu; ctr++)
|
|
{
|
|
WORD16 *pi2_cur_deq_data_cb;
|
|
WORD16 *pi2_cur_deq_data_cr;
|
|
|
|
WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
|
|
WORD32 luma_nbr_flags = 0;
|
|
|
|
luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
|
|
(ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
|
|
(luma_trans_size >> 2),
|
|
(luma_trans_size >> 2));
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
|
|
{
|
|
WORD32 cbf, num_bytes;
|
|
LWORD64 trans_ssd_u, trans_ssd_v;
|
|
UWORD8 u1_is_recon_available;
|
|
|
|
WORD32 trans_size_m2 = trans_size << 1;
|
|
UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
|
|
(((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
|
|
(i4_subtu_idx * trans_size * chrm_src_stride);
|
|
UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
|
|
(((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
|
|
(i4_subtu_idx * trans_size * pred_strd);
|
|
WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
|
|
UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
|
|
->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
|
|
((ctr & 1) * trans_size_m2) +
|
|
(((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
|
|
(i4_subtu_idx * trans_size * i4_recon_stride);
|
|
|
|
/* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
|
|
chroma coeff/iq for high quality intra SATD special modes. Will
|
|
be over written by coeff of luma mode in chroma_rdopt call */
|
|
UWORD8 *pu1_ecd_data_cb =
|
|
&ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
|
|
UWORD8 *pu1_ecd_data_cr =
|
|
&ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
|
|
|
|
WORD32 chrm_pred_func_idx = 0;
|
|
LWORD64 curr_cb_cod_cost = 0;
|
|
LWORD64 curr_cr_cod_cost = 0;
|
|
WORD32 nbr_flags = 0;
|
|
|
|
i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
|
|
i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
|
|
((i4_subtu_idx * trans_size) >> 2);
|
|
pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
|
|
((ctr & 1) * trans_size) +
|
|
(((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
|
|
(i4_subtu_idx * trans_size * deq_data_strd);
|
|
pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
|
|
((ctr & 1) * trans_size) +
|
|
(((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
|
|
(i4_subtu_idx * trans_size * deq_data_strd);
|
|
|
|
/* left cu boundary */
|
|
if(0 == i4_subtu_pos_x)
|
|
{
|
|
left_strd = cu_left_stride;
|
|
pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
|
|
}
|
|
else
|
|
{
|
|
pu1_left = pu1_cur_recon - 2;
|
|
left_strd = i4_recon_stride;
|
|
}
|
|
|
|
/* top cu boundary */
|
|
if(0 == i4_subtu_pos_y)
|
|
{
|
|
pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
|
|
}
|
|
else
|
|
{
|
|
pu1_top = pu1_cur_recon - i4_recon_stride;
|
|
}
|
|
|
|
/* by default top left is set to cu top left */
|
|
pu1_top_left = pu1_cu_top_left;
|
|
|
|
/* top left based on position */
|
|
if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
|
|
{
|
|
pu1_top_left = pu1_left - left_strd;
|
|
}
|
|
else if(0 != i4_subtu_pos_x)
|
|
{
|
|
pu1_top_left = pu1_top - 2;
|
|
}
|
|
|
|
/* populate the coeffs scan idx */
|
|
scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
|
|
/* for 4x4 transforms based on intra pred mode scan is choosen*/
|
|
if(4 == trans_size)
|
|
{
|
|
/* for modes from 22 upto 30 horizontal scan is used */
|
|
if((best_chrm_mode > 21) && (best_chrm_mode < 31))
|
|
{
|
|
scan_idx = SCAN_HORZ;
|
|
}
|
|
/* for modes from 6 upto 14 horizontal scan is used */
|
|
else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
|
|
{
|
|
scan_idx = SCAN_VERT;
|
|
}
|
|
}
|
|
|
|
nbr_flags = ihevce_get_intra_chroma_tu_nbr(
|
|
luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
|
|
|
|
/* call the chroma reference array substitution */
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr(
|
|
pu1_top_left,
|
|
pu1_top,
|
|
pu1_left,
|
|
left_strd,
|
|
trans_size,
|
|
nbr_flags,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1);
|
|
|
|
/* use the look up to get the function idx */
|
|
chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_size,
|
|
best_chrm_mode);
|
|
|
|
/* UPLANE RDOPT Loop */
|
|
{
|
|
WORD32 tu_bits;
|
|
|
|
cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pi2_cur_deq_data_cb,
|
|
deq_data_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
|
|
ps_ctxt->au1_cu_csbf,
|
|
ps_ctxt->i4_cu_csbf_strd,
|
|
trans_size,
|
|
scan_idx,
|
|
1,
|
|
&num_bytes,
|
|
&tu_bits,
|
|
&ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
|
|
&ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
|
|
&u1_is_recon_available,
|
|
i4_perform_sbh,
|
|
i4_perform_rdoq,
|
|
&trans_ssd_u,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
0,
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
U_PLANE);
|
|
|
|
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
}
|
|
else
|
|
{
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
memcpy(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
/* RDOPT copy States : Restoring back the Cb init state to Cr */
|
|
else
|
|
{
|
|
memcpy(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
|
|
if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data_cb,
|
|
deq_data_strd,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
(pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
|
|
trans_size,
|
|
cbf,
|
|
ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
|
|
ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
|
|
U_PLANE);
|
|
}
|
|
|
|
ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
|
|
curr_cb_cod_cost =
|
|
trans_ssd_u +
|
|
COMPUTE_RATE_COST_CLIP30(
|
|
tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
|
|
chrm_tu_bits += tu_bits;
|
|
ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
|
|
ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
|
|
num_bytes;
|
|
}
|
|
|
|
/* VPLANE RDOPT Loop */
|
|
{
|
|
WORD32 tu_bits;
|
|
|
|
cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pi2_cur_deq_data_cr,
|
|
deq_data_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
|
|
ps_ctxt->au1_cu_csbf,
|
|
ps_ctxt->i4_cu_csbf_strd,
|
|
trans_size,
|
|
scan_idx,
|
|
1,
|
|
&num_bytes,
|
|
&tu_bits,
|
|
&ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
|
|
&ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
|
|
&u1_is_recon_available,
|
|
i4_perform_sbh,
|
|
i4_perform_rdoq,
|
|
&trans_ssd_v,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
0,
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
V_PLANE);
|
|
|
|
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
}
|
|
else
|
|
{
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
/* RDOPT copy States : Restoring back the Cb init state to Cr */
|
|
else
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
|
|
if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data_cr,
|
|
deq_data_strd,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
(pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
|
|
trans_size,
|
|
cbf,
|
|
ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
|
|
ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
|
|
V_PLANE);
|
|
}
|
|
|
|
ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
|
|
curr_cr_cod_cost =
|
|
trans_ssd_v +
|
|
COMPUTE_RATE_COST_CLIP30(
|
|
tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
|
|
chrm_tu_bits += tu_bits;
|
|
ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
|
|
ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
|
|
num_bytes;
|
|
}
|
|
|
|
chrm_cod_cost += curr_cb_cod_cost;
|
|
chrm_cod_cost += curr_cr_cod_cost;
|
|
}
|
|
|
|
/* set the neighbour map to 1 */
|
|
ihevce_set_nbr_map(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
|
|
(ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
|
|
(luma_trans_size >> 2),
|
|
1);
|
|
}
|
|
|
|
/* set the neighbour map to 0 */
|
|
ihevce_set_nbr_map(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(ps_cu_analyse->b3_cu_pos_x << 1),
|
|
(ps_cu_analyse->b3_cu_pos_y << 1),
|
|
(ps_cu_analyse->u1_cu_size >> 2),
|
|
0);
|
|
|
|
/* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
|
|
/* This is done by adding the bits for signalling chroma mode (0-3) */
|
|
/* and subtracting the bits for chroma mode same as luma mode (4) */
|
|
#if CHROMA_RDOPT_ENABLE
|
|
{
|
|
/* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
|
|
WORD32 bits_frac_1 =
|
|
gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
|
|
|
|
WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
|
|
|
|
/* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
|
|
WORD32 bits_for_mode4 =
|
|
gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
|
|
|
|
/* accumulate into final rd cost for chroma */
|
|
ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
|
|
(bits_for_mode_0to3 - bits_for_mode4),
|
|
ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
|
|
(LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
|
|
|
|
chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
|
|
}
|
|
#endif
|
|
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
UWORD8 *pu1_recon_cu;
|
|
WORD32 recon_stride;
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
WORD32 start_index;
|
|
WORD32 num_horz_cu_in_ctb;
|
|
WORD32 had_block_size;
|
|
|
|
/* tODO: sreenivasa ctb size has to be used appropriately */
|
|
had_block_size = 8;
|
|
num_horz_cu_in_ctb = 2 * 64 / had_block_size;
|
|
curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
|
|
curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
|
|
recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
|
|
pu1_recon_cu =
|
|
aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; //
|
|
|
|
/* start index to index the source satd of curr cu int he current ctb*/
|
|
start_index = 2 * (curr_pos_x / had_block_size) +
|
|
(curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
|
|
|
|
{
|
|
chrm_cod_cost += ihevce_psy_rd_cost_croma(
|
|
ps_ctxt->ai4_source_chroma_satd,
|
|
pu1_recon_cu,
|
|
recon_stride,
|
|
1, //
|
|
cu_size,
|
|
0, // pic type
|
|
0, //layer id
|
|
ps_ctxt->i4_satd_lamda, // lambda
|
|
start_index,
|
|
ps_ctxt->u1_is_input_data_hbd, // 8 bit
|
|
ps_ctxt->u1_chroma_array_type,
|
|
&ps_ctxt->s_cmn_opt_func
|
|
|
|
); // chroma subsampling 420
|
|
}
|
|
}
|
|
|
|
ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
|
|
ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
|
|
|
|
memcpy(
|
|
&ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
|
|
*
|
|
* \brief
|
|
* Coding unit processing function for chroma
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] rd_opt_curr_idx index in the array of RDopt params
|
|
* \param[in] func_proc_mode TU_EQ_CU or other case
|
|
* \param[in] pu1_chrm_src pointer to source data buffer
|
|
* \param[in] chrm_src_stride source buffer stride
|
|
* \param[in] pu1_cu_left pointer to left recon data buffer
|
|
* \param[in] pu1_cu_top pointer to top recon data buffer
|
|
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
|
|
* \param[in] left_stride left recon buffer stride
|
|
* \param[out] cu_pos_x position x of current CU in CTB
|
|
* \param[out] cu_pos_y position y of current CU in CTB
|
|
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
|
|
*
|
|
* \return
|
|
* Chroma coding cost (cb adn Cr included)
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
LWORD64 ihevce_chroma_cu_prcs_rdopt(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
WORD32 rd_opt_curr_idx,
|
|
WORD32 func_proc_mode,
|
|
UWORD8 *pu1_chrm_src,
|
|
WORD32 chrm_src_stride,
|
|
UWORD8 *pu1_cu_left,
|
|
UWORD8 *pu1_cu_top,
|
|
UWORD8 *pu1_cu_top_left,
|
|
WORD32 cu_left_stride,
|
|
WORD32 cu_pos_x,
|
|
WORD32 cu_pos_y,
|
|
WORD32 *pi4_chrm_tu_bits,
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy)
|
|
{
|
|
tu_enc_loop_out_t *ps_tu;
|
|
tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
|
|
|
|
ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
|
|
UWORD8 *pu1_pred;
|
|
UWORD8 *pu1_recon;
|
|
WORD32 i4_recon_stride;
|
|
WORD32 cu_size, trans_size = 0;
|
|
WORD32 pred_strd;
|
|
WORD32 ctr, i4_subtu_idx;
|
|
WORD32 scan_idx;
|
|
WORD32 u1_is_cu_coded_old;
|
|
WORD32 init_bytes_offset;
|
|
|
|
enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
|
|
recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
|
|
|
|
WORD32 total_bytes_offset = 0;
|
|
LWORD64 chrm_cod_cost = 0;
|
|
WORD32 chrm_tu_bits = 0;
|
|
WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
|
|
LWORD64 i8_ssd_cb = 0;
|
|
WORD32 i4_bits_cb = 0;
|
|
LWORD64 i8_ssd_cr = 0;
|
|
WORD32 i4_bits_cr = 0;
|
|
UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
|
|
UWORD8 u1_num_tus =
|
|
/* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
|
|
(!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
|
|
? 1
|
|
: ps_best_cu_prms->u2_num_tus_in_cu;
|
|
UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
|
|
UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
|
|
(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
/* Get the RDOPT cost of the best CU mode for early_exit */
|
|
LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
|
|
/* Get the current running RDOPT (Luma RDOPT) for early_exit */
|
|
LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
|
|
WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
|
|
WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
|
|
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
|
|
if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
|
|
CONVERT_SSDS_TO_SPATIAL_DOMAIN;
|
|
}
|
|
|
|
/* Store the init bytes offset from luma */
|
|
init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
|
|
|
|
/* Unused pred buffer in merge_skip_pred_data_t structure is used as
|
|
Chroma pred storage buf. for final_recon function.
|
|
The buffer is split into two and used as a ping-pong buffer */
|
|
pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
|
|
rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
|
|
(u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
|
|
|
|
pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
|
|
|
|
pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
|
|
i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
|
|
cu_size = ps_best_cu_prms->u1_cu_size;
|
|
chrm_tu_bits = 0;
|
|
|
|
/* get the first TU pointer */
|
|
ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
|
|
/* get the first TU enc_loop temp prms pointer */
|
|
ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
|
|
|
|
if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
|
|
{
|
|
/* Mode signalled by intra prediction for luma */
|
|
luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
|
|
|
|
#if DISABLE_RDOQ_INTRA
|
|
i4_perform_rdoq = 0;
|
|
#endif
|
|
}
|
|
|
|
else
|
|
{
|
|
UWORD8 *pu1_pred_org = pu1_pred;
|
|
|
|
/* ------ Motion Compensation for Chroma -------- */
|
|
for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
|
|
{
|
|
pu_t *ps_pu;
|
|
WORD32 inter_pu_wd;
|
|
WORD32 inter_pu_ht;
|
|
|
|
ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
|
|
|
|
inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
|
|
inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
|
|
inter_pu_ht <<= u1_is_422;
|
|
|
|
ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
|
|
|
|
if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
|
|
{
|
|
/* 2Nx__ partion case */
|
|
if(inter_pu_wd == cu_size)
|
|
{
|
|
pu1_pred += (inter_pu_ht * pred_strd);
|
|
}
|
|
|
|
/* __x2N partion case */
|
|
if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
|
|
{
|
|
pu1_pred += inter_pu_wd;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* restore the pred pointer to start for transform loop */
|
|
pu1_pred = pu1_pred_org;
|
|
}
|
|
|
|
/* Used to store back only the luma based info. if SATD based chorma
|
|
mode also comes */
|
|
u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
|
|
|
|
/* evaluate chroma candidates (same as luma) and
|
|
if INTRA & HIGH_QUALITY compare with best SATD mode */
|
|
{
|
|
WORD32 calc_recon = 0, deq_data_strd;
|
|
WORD16 *pi2_deq_data;
|
|
UWORD8 *pu1_ecd_data;
|
|
UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
|
|
|
|
pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
|
|
pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
|
|
deq_data_strd = cu_size;
|
|
/* update ecd buffer for storing coeff. */
|
|
pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
|
|
pu1_ecd_data += init_bytes_offset;
|
|
/* store chroma starting index */
|
|
ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
|
|
|
|
/* get the first TU pointer */
|
|
ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
|
|
ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
|
|
|
|
/* Reset total_bytes_offset for each candidate */
|
|
chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
|
|
: luma_pred_mode;
|
|
|
|
total_bytes_offset = 0;
|
|
|
|
if(TU_EQ_SUBCU == func_proc_mode)
|
|
{
|
|
func_proc_mode = TU_EQ_CU_DIV2;
|
|
}
|
|
|
|
/* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
|
|
TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */
|
|
if(8 == cu_size)
|
|
{
|
|
func_proc_mode = TU_EQ_CU;
|
|
}
|
|
|
|
/* loop based on num tus in a cu */
|
|
if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
|
|
(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
|
|
(chrm_pred_mode !=
|
|
ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
|
|
{
|
|
/* loop based on num tus in a cu */
|
|
for(ctr = 0; ctr < u1_num_tus; ctr++)
|
|
{
|
|
WORD32 num_bytes = 0;
|
|
LWORD64 curr_cb_cod_cost = 0;
|
|
LWORD64 curr_cr_cod_cost = 0;
|
|
WORD32 chrm_pred_func_idx = 0;
|
|
UWORD8 u1_is_early_exit_condition_satisfied = 0;
|
|
|
|
/* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */
|
|
/* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
|
|
ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
|
|
ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
|
|
ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
|
|
ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
|
|
ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
|
|
ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
|
|
ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
|
|
ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
|
|
ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
|
|
ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
|
|
|
|
/* TU level inits */
|
|
/* check if chroma present flag is set */
|
|
if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
|
|
{
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
|
|
/* get the current transform size */
|
|
trans_size = ps_tu->s_tu.b3_size;
|
|
trans_size = (1 << (trans_size + 1)); /* in chroma units */
|
|
|
|
/* since 2x2 transform is not allowed for chroma*/
|
|
if(2 == trans_size)
|
|
{
|
|
trans_size = 4;
|
|
}
|
|
}
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
|
|
{
|
|
WORD32 cbf;
|
|
UWORD8 u1_is_recon_available;
|
|
|
|
WORD32 nbr_flags = 0;
|
|
WORD32 zero_cols = 0;
|
|
WORD32 zero_rows = 0;
|
|
|
|
/* check if chroma present flag is set */
|
|
if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
|
|
{
|
|
UWORD8 *pu1_cur_pred;
|
|
UWORD8 *pu1_cur_recon;
|
|
UWORD8 *pu1_cur_src;
|
|
WORD16 *pi2_cur_deq_data;
|
|
WORD32 curr_pos_x, curr_pos_y;
|
|
LWORD64 trans_ssd_u, trans_ssd_v;
|
|
|
|
/* get the current sub-tu posx and posy w.r.t to cu */
|
|
curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
|
|
curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
|
|
(i4_subtu_idx * trans_size);
|
|
|
|
/* 420sp case only vertical height will be half */
|
|
if(u1_is_422 == 0)
|
|
{
|
|
curr_pos_y >>= 1;
|
|
}
|
|
|
|
/* increment the pointers to start of current Sub-TU */
|
|
pu1_cur_recon = (pu1_recon + curr_pos_x);
|
|
pu1_cur_recon += (curr_pos_y * i4_recon_stride);
|
|
pu1_cur_src = (pu1_chrm_src + curr_pos_x);
|
|
pu1_cur_src += (curr_pos_y * chrm_src_stride);
|
|
pu1_cur_pred = (pu1_pred + curr_pos_x);
|
|
pu1_cur_pred += (curr_pos_y * pred_strd);
|
|
pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
|
|
pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
|
|
|
|
/* populate the coeffs scan idx */
|
|
scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
/* perform intra prediction only for Intra case */
|
|
if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
|
|
{
|
|
UWORD8 *pu1_top_left;
|
|
UWORD8 *pu1_top;
|
|
UWORD8 *pu1_left;
|
|
WORD32 left_strd;
|
|
|
|
calc_recon = !u1_compute_spatial_ssd &&
|
|
((4 == u1_num_tus) || (u1_is_422 == 1)) &&
|
|
(((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
|
|
((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
|
|
((u1_num_tus == 4) && (ctr < 3)));
|
|
|
|
/* left cu boundary */
|
|
if(0 == curr_pos_x)
|
|
{
|
|
pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
|
|
left_strd = cu_left_stride;
|
|
}
|
|
else
|
|
{
|
|
pu1_left = pu1_cur_recon - 2;
|
|
left_strd = i4_recon_stride;
|
|
}
|
|
|
|
/* top cu boundary */
|
|
if(0 == curr_pos_y)
|
|
{
|
|
pu1_top = pu1_cu_top + curr_pos_x;
|
|
}
|
|
else
|
|
{
|
|
pu1_top = pu1_cur_recon - i4_recon_stride;
|
|
}
|
|
|
|
/* by default top left is set to cu top left */
|
|
pu1_top_left = pu1_cu_top_left;
|
|
|
|
/* top left based on position */
|
|
if((0 != curr_pos_y) && (0 == curr_pos_x))
|
|
{
|
|
pu1_top_left = pu1_left - cu_left_stride;
|
|
}
|
|
else if(0 != curr_pos_x)
|
|
{
|
|
pu1_top_left = pu1_top - 2;
|
|
}
|
|
|
|
/* for 4x4 transforms based on intra pred mode scan is choosen*/
|
|
if(4 == trans_size)
|
|
{
|
|
/* for modes from 22 upto 30 horizontal scan is used */
|
|
if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
|
|
{
|
|
scan_idx = SCAN_HORZ;
|
|
}
|
|
/* for modes from 6 upto 14 horizontal scan is used */
|
|
else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
|
|
{
|
|
scan_idx = SCAN_VERT;
|
|
}
|
|
}
|
|
|
|
nbr_flags = ihevce_get_intra_chroma_tu_nbr(
|
|
ps_best_cu_prms->au4_nbr_flags[ctr],
|
|
i4_subtu_idx,
|
|
trans_size,
|
|
u1_is_422);
|
|
|
|
/* call the chroma reference array substitution */
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr(
|
|
pu1_top_left,
|
|
pu1_top,
|
|
pu1_left,
|
|
left_strd,
|
|
trans_size,
|
|
nbr_flags,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1);
|
|
|
|
/* use the look up to get the function idx */
|
|
chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_size,
|
|
chrm_pred_mode);
|
|
}
|
|
|
|
if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
|
|
{
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] =
|
|
!ps_best_cu_prms->u1_skip_flag;
|
|
}
|
|
else if(!ctr && !i4_subtu_idx)
|
|
{
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
|
|
}
|
|
/************************************************************/
|
|
/* recon loop is done for all cases including skip cu */
|
|
/* This is because skipping chroma reisdual based on luma */
|
|
/* skip decision can lead to chroma artifacts */
|
|
/************************************************************/
|
|
/************************************************************/
|
|
/*In the high quality and medium speed modes, wherein chroma*/
|
|
/*and luma costs are included in the total cost calculation */
|
|
/*the cost is just a ssd cost, and not that obtained through*/
|
|
/*iq_it path */
|
|
/************************************************************/
|
|
if(ps_best_cu_prms->u1_skip_flag == 0)
|
|
{
|
|
WORD32 tu_bits;
|
|
|
|
cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pi2_cur_deq_data,
|
|
deq_data_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data + total_bytes_offset,
|
|
ps_ctxt->au1_cu_csbf,
|
|
ps_ctxt->i4_cu_csbf_strd,
|
|
trans_size,
|
|
scan_idx,
|
|
PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
|
|
&num_bytes,
|
|
&tu_bits,
|
|
&zero_cols,
|
|
&zero_rows,
|
|
&u1_is_recon_available,
|
|
i4_perform_sbh,
|
|
i4_perform_rdoq,
|
|
&trans_ssd_u,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
ps_best_cu_prms->u1_skip_flag,
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
U_PLANE);
|
|
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
}
|
|
|
|
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
}
|
|
else
|
|
{
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
curr_cb_cod_cost =
|
|
trans_ssd_u +
|
|
COMPUTE_RATE_COST_CLIP30(
|
|
tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
|
|
|
|
chrm_tu_bits += tu_bits;
|
|
i4_bits_cb += tu_bits;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
&ps_ctxt->s_rdopt_entropy_ctxt
|
|
.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
/* RDOPT copy States : Restoring back the Cb init state to Cr */
|
|
else
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt
|
|
.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
|
|
/* If Intra and TU=CU/2, need recon for next TUs */
|
|
if(calc_recon)
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data,
|
|
deq_data_strd,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
(pu1_ecd_data + total_bytes_offset),
|
|
trans_size,
|
|
cbf,
|
|
zero_cols,
|
|
zero_rows,
|
|
U_PLANE);
|
|
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* num bytes is set to 0 */
|
|
num_bytes = 0;
|
|
|
|
/* cbf is returned as 0 */
|
|
cbf = 0;
|
|
|
|
curr_cb_cod_cost = trans_ssd_u =
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
|
pu1_cur_pred,
|
|
pu1_cur_src,
|
|
pred_strd,
|
|
chrm_src_stride,
|
|
trans_size,
|
|
trans_size,
|
|
U_PLANE);
|
|
|
|
if(u1_compute_spatial_ssd)
|
|
{
|
|
/* buffer copy fromp pred to recon */
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_size,
|
|
trans_size,
|
|
U_PLANE);
|
|
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
trans_ssd_u = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_u,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
U_PLANE);
|
|
}
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
#if !WEIGH_CHROMA_COST
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
|
|
#else
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
|
|
ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
|
|
(curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
#if !WEIGH_CHROMA_COST
|
|
curr_rdopt_cost += curr_cb_cod_cost;
|
|
#else
|
|
curr_rdopt_cost +=
|
|
((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
chrm_cod_cost += curr_cb_cod_cost;
|
|
i8_ssd_cb += trans_ssd_u;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(curr_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
u1_is_early_exit_condition_satisfied = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* inter cu is coded if any of the tu is coded in it */
|
|
ps_best_cu_prms->u1_is_cu_coded |= cbf;
|
|
|
|
/* update CB related params */
|
|
ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
|
|
total_bytes_offset + init_bytes_offset;
|
|
|
|
if(0 == i4_subtu_idx)
|
|
{
|
|
ps_tu->s_tu.b1_cb_cbf = cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
|
|
}
|
|
|
|
total_bytes_offset += num_bytes;
|
|
|
|
ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
|
|
ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
|
|
ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
|
|
|
|
/* recon loop is done for non skip cases */
|
|
if(ps_best_cu_prms->u1_skip_flag == 0)
|
|
{
|
|
WORD32 tu_bits;
|
|
|
|
cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pi2_cur_deq_data + trans_size,
|
|
deq_data_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data + total_bytes_offset,
|
|
ps_ctxt->au1_cu_csbf,
|
|
ps_ctxt->i4_cu_csbf_strd,
|
|
trans_size,
|
|
scan_idx,
|
|
PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
|
|
&num_bytes,
|
|
&tu_bits,
|
|
&zero_cols,
|
|
&zero_rows,
|
|
&u1_is_recon_available,
|
|
i4_perform_sbh,
|
|
i4_perform_rdoq,
|
|
&trans_ssd_v,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
i4_alpha_stim_multiplier,
|
|
u1_is_cu_noisy,
|
|
#endif
|
|
ps_best_cu_prms->u1_skip_flag,
|
|
u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
V_PLANE);
|
|
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
}
|
|
|
|
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
#else
|
|
if(u1_compute_spatial_ssd && u1_is_recon_available)
|
|
{
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
}
|
|
else
|
|
{
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
curr_cr_cod_cost =
|
|
trans_ssd_v +
|
|
COMPUTE_RATE_COST_CLIP30(
|
|
tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
|
|
chrm_tu_bits += tu_bits;
|
|
i4_bits_cr += tu_bits;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
&ps_ctxt->s_rdopt_entropy_ctxt
|
|
.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
/* RDOPT copy States : Restoring back the Cb init state to Cr */
|
|
else
|
|
{
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt
|
|
.as_cu_entropy_ctxt[rd_opt_curr_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
}
|
|
|
|
/* If Intra and TU=CU/2, need recon for next TUs */
|
|
if(calc_recon)
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
(pi2_cur_deq_data + trans_size),
|
|
deq_data_strd,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
(pu1_ecd_data + total_bytes_offset),
|
|
trans_size,
|
|
cbf,
|
|
zero_cols,
|
|
zero_rows,
|
|
V_PLANE);
|
|
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* num bytes is set to 0 */
|
|
num_bytes = 0;
|
|
|
|
/* cbf is returned as 0 */
|
|
cbf = 0;
|
|
|
|
curr_cr_cod_cost = trans_ssd_v =
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
|
pu1_cur_pred,
|
|
pu1_cur_src,
|
|
pred_strd,
|
|
chrm_src_stride,
|
|
trans_size,
|
|
trans_size,
|
|
V_PLANE);
|
|
|
|
if(u1_compute_spatial_ssd)
|
|
{
|
|
/* buffer copy fromp pred to recon */
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_recon,
|
|
i4_recon_stride,
|
|
trans_size,
|
|
trans_size,
|
|
V_PLANE);
|
|
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = 0;
|
|
}
|
|
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
trans_ssd_v = ihevce_inject_stim_into_distortion(
|
|
pu1_cur_src,
|
|
chrm_src_stride,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_ssd_v,
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
V_PLANE);
|
|
}
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
#if !WEIGH_CHROMA_COST
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
|
|
#else
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
|
|
ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
|
|
(curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
#if !WEIGH_CHROMA_COST
|
|
curr_rdopt_cost += curr_cr_cod_cost;
|
|
#else
|
|
curr_rdopt_cost +=
|
|
((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
|
|
chrm_cod_cost += curr_cr_cod_cost;
|
|
i8_ssd_cr += trans_ssd_v;
|
|
|
|
if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
|
|
{
|
|
/* Early exit : If the current running cost exceeds
|
|
the prev. best mode cost, break */
|
|
if(curr_rdopt_cost > prev_best_rdopt_cost)
|
|
{
|
|
u1_is_early_exit_condition_satisfied = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* inter cu is coded if any of the tu is coded in it */
|
|
ps_best_cu_prms->u1_is_cu_coded |= cbf;
|
|
|
|
/* update CR related params */
|
|
ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
|
|
total_bytes_offset + init_bytes_offset;
|
|
|
|
if(0 == i4_subtu_idx)
|
|
{
|
|
ps_tu->s_tu.b1_cr_cbf = cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
|
|
}
|
|
|
|
total_bytes_offset += num_bytes;
|
|
|
|
ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
|
|
ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
|
|
ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
|
|
UCHAR_MAX;
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
|
|
UCHAR_MAX;
|
|
}
|
|
}
|
|
|
|
if(u1_is_early_exit_condition_satisfied)
|
|
{
|
|
break;
|
|
}
|
|
|
|
/* loop increments */
|
|
ps_tu++;
|
|
ps_tu_temp_prms++;
|
|
}
|
|
|
|
/* Signal as luma mode. HIGH_QUALITY may update it */
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
|
|
|
|
/* modify the cost chrm_cod_cost */
|
|
if(ps_ctxt->u1_enable_psyRDOPT)
|
|
{
|
|
UWORD8 *pu1_recon_cu;
|
|
WORD32 recon_stride;
|
|
WORD32 curr_pos_x;
|
|
WORD32 curr_pos_y;
|
|
WORD32 start_index;
|
|
WORD32 num_horz_cu_in_ctb;
|
|
WORD32 had_block_size;
|
|
/* tODO: sreenivasa ctb size has to be used appropriately */
|
|
had_block_size = 8;
|
|
num_horz_cu_in_ctb = 2 * 64 / had_block_size;
|
|
|
|
curr_pos_x = cu_pos_x << 3; /* pel units */
|
|
curr_pos_y = cu_pos_y << 3; /* pel units */
|
|
recon_stride = i4_recon_stride;
|
|
pu1_recon_cu = pu1_recon;
|
|
|
|
/* start index to index the source satd of curr cu int he current ctb*/
|
|
start_index = 2 * (curr_pos_x / had_block_size) +
|
|
(curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
|
|
|
|
{
|
|
chrm_cod_cost += ihevce_psy_rd_cost_croma(
|
|
ps_ctxt->ai4_source_chroma_satd,
|
|
pu1_recon,
|
|
recon_stride,
|
|
1, //
|
|
cu_size,
|
|
0, // pic type
|
|
0, //layer id
|
|
ps_ctxt->i4_satd_lamda, // lambda
|
|
start_index,
|
|
ps_ctxt->u1_is_input_data_hbd, // 8 bit
|
|
ps_ctxt->u1_chroma_array_type,
|
|
&ps_ctxt->s_cmn_opt_func
|
|
|
|
); // chroma subsampling 420
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
u1_is_mode_eq_chroma_satd_mode = 1;
|
|
chrm_cod_cost = MAX_COST_64;
|
|
}
|
|
|
|
/* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
|
|
if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
|
|
(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
|
|
{
|
|
if(64 == cu_size)
|
|
{
|
|
ASSERT(TU_EQ_CU != func_proc_mode);
|
|
}
|
|
|
|
if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
|
|
.i8_chroma_best_rdopt < chrm_cod_cost)
|
|
{
|
|
UWORD8 *pu1_src;
|
|
UWORD8 *pu1_ecd_data_src_cb;
|
|
UWORD8 *pu1_ecd_data_src_cr;
|
|
|
|
chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
|
|
&ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
|
|
|
|
UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
|
|
WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
|
|
WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
|
|
|
|
pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
|
|
chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
|
|
chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
|
|
chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
|
|
|
|
if(u1_is_mode_eq_chroma_satd_mode)
|
|
{
|
|
chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
|
|
}
|
|
|
|
/*Resetting total_num_bytes_to 0*/
|
|
total_bytes_offset = 0;
|
|
|
|
/* Update the CABAC state corresponding to chroma only */
|
|
/* Chroma Cbf */
|
|
memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
|
|
/* Chroma transform skip */
|
|
memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
|
|
/* Chroma last coeff x prefix */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
|
|
pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
|
|
3);
|
|
/* Chroma last coeff y prefix */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
|
|
pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
|
|
3);
|
|
/* Chroma csbf */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
|
|
pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
|
|
2);
|
|
/* Chroma sig coeff flags */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
|
|
/* Chroma absgt1 flags */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
|
|
pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
|
|
8);
|
|
/* Chroma absgt2 flags */
|
|
memcpy(
|
|
pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
|
|
pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
|
|
2);
|
|
|
|
ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
|
|
ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
|
|
|
|
/* update to luma decision as we update chroma in final mode */
|
|
ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
|
|
|
|
for(ctr = 0; ctr < u1_num_tus; ctr++)
|
|
{
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
|
|
{
|
|
WORD32 cbf;
|
|
WORD32 num_bytes;
|
|
|
|
pu1_ecd_data_src_cb =
|
|
&ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
|
|
pu1_ecd_data_src_cr =
|
|
&ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
|
|
|
|
/* check if chroma present flag is set */
|
|
if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
|
|
{
|
|
UWORD8 *pu1_cur_pred_dest;
|
|
UWORD8 *pu1_cur_pred_src;
|
|
WORD32 pred_src_strd;
|
|
WORD16 *pi2_cur_deq_data_dest;
|
|
WORD16 *pi2_cur_deq_data_src_cb;
|
|
WORD16 *pi2_cur_deq_data_src_cr;
|
|
WORD32 deq_src_strd;
|
|
|
|
WORD32 curr_pos_x, curr_pos_y;
|
|
|
|
trans_size = ps_tu->s_tu.b3_size;
|
|
trans_size = (1 << (trans_size + 1)); /* in chroma units */
|
|
|
|
/*Deriving stride values*/
|
|
pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
|
|
deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
|
|
|
|
/* since 2x2 transform is not allowed for chroma*/
|
|
if(2 == trans_size)
|
|
{
|
|
trans_size = 4;
|
|
}
|
|
|
|
/* get the current tu posx and posy w.r.t to cu */
|
|
curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
|
|
curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
|
|
(i4_subtu_idx * trans_size);
|
|
|
|
/* 420sp case only vertical height will be half */
|
|
if(0 == u1_is_422)
|
|
{
|
|
curr_pos_y >>= 1;
|
|
}
|
|
|
|
/* increment the pointers to start of current TU */
|
|
pu1_cur_pred_src =
|
|
((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
|
|
pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
|
|
pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
|
|
pu1_cur_pred_dest += (curr_pos_y * pred_strd);
|
|
|
|
pi2_cur_deq_data_src_cb =
|
|
&ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
|
|
pi2_cur_deq_data_src_cr =
|
|
&ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
|
|
pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
|
|
pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
|
|
pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
|
|
pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
|
|
|
|
/*Overwriting deq data with that belonging to the winning special mode
|
|
(luma mode != chroma mode)
|
|
ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
|
|
correspondingly manipulate to copy WORD16 data*/
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
(UWORD8 *)pi2_cur_deq_data_dest,
|
|
(deq_data_strd << 1),
|
|
(UWORD8 *)pi2_cur_deq_data_src_cb,
|
|
(deq_src_strd << 1),
|
|
(trans_size << 1),
|
|
trans_size);
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
(UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
|
|
(deq_data_strd << 1),
|
|
(UWORD8 *)pi2_cur_deq_data_src_cr,
|
|
(deq_src_strd << 1),
|
|
(trans_size << 1),
|
|
trans_size);
|
|
|
|
/*Overwriting pred data with that belonging to the winning special mode
|
|
(luma mode != chroma mode)*/
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
pu1_cur_pred_dest,
|
|
pred_strd,
|
|
pu1_cur_pred_src,
|
|
pred_src_strd,
|
|
(trans_size << 1),
|
|
trans_size);
|
|
|
|
num_bytes = ps_chr_intra_satd_ctxt
|
|
->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
|
|
cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
|
|
/* inter cu is coded if any of the tu is coded in it */
|
|
ps_best_cu_prms->u1_is_cu_coded |= cbf;
|
|
|
|
/* update CB related params */
|
|
ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
|
|
total_bytes_offset + init_bytes_offset;
|
|
|
|
if(0 == i4_subtu_idx)
|
|
{
|
|
ps_tu->s_tu.b1_cb_cbf = cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
|
|
}
|
|
|
|
/*Overwriting the cb ecd data corresponding to the special mode*/
|
|
if(0 != num_bytes)
|
|
{
|
|
memcpy(
|
|
(pu1_ecd_data + total_bytes_offset),
|
|
pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
|
|
num_bytes);
|
|
}
|
|
|
|
total_bytes_offset += num_bytes;
|
|
ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
|
|
ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
|
|
|
|
num_bytes = ps_chr_intra_satd_ctxt
|
|
->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
|
|
cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
|
|
/* inter cu is coded if any of the tu is coded in it */
|
|
ps_best_cu_prms->u1_is_cu_coded |= cbf;
|
|
|
|
/*Overwriting the cr ecd data corresponding to the special mode*/
|
|
if(0 != num_bytes)
|
|
{
|
|
memcpy(
|
|
(pu1_ecd_data + total_bytes_offset),
|
|
pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
|
|
num_bytes);
|
|
}
|
|
|
|
/* update CR related params */
|
|
ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
|
|
total_bytes_offset + init_bytes_offset;
|
|
|
|
if(0 == i4_subtu_idx)
|
|
{
|
|
ps_tu->s_tu.b1_cr_cbf = cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
|
|
}
|
|
|
|
total_bytes_offset += num_bytes;
|
|
ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
|
|
|
|
/*Updating zero rows and zero cols*/
|
|
ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
|
|
ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
|
|
ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
|
|
ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
|
|
ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
|
|
ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
|
|
ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
|
|
ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
|
|
|
|
ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
|
|
|
|
if((u1_num_tus > 1) &&
|
|
ps_recon_datastore->au1_is_chromaRecon_available[2])
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = 2;
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = 2;
|
|
}
|
|
else if(
|
|
(1 == u1_num_tus) &&
|
|
ps_recon_datastore->au1_is_chromaRecon_available[1])
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = 1;
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = 1;
|
|
}
|
|
else
|
|
{
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
|
|
[i4_subtu_idx] = UCHAR_MAX;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* loop increments */
|
|
ps_tu++;
|
|
ps_tu_temp_prms++;
|
|
}
|
|
}
|
|
|
|
if(!u1_is_422)
|
|
{
|
|
if(chrm_pred_mode == luma_pred_mode)
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
|
|
}
|
|
else if(chrm_pred_mode == 0)
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
|
|
}
|
|
else if(chrm_pred_mode == 1)
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
|
|
}
|
|
else if(chrm_pred_mode == 10)
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
|
|
}
|
|
else if(chrm_pred_mode == 26)
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
|
|
}
|
|
else
|
|
{
|
|
ASSERT(0); /*Should not come here*/
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
|
|
}
|
|
else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
|
|
}
|
|
else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
|
|
}
|
|
else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
|
|
}
|
|
else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
|
|
{
|
|
ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
|
|
}
|
|
else
|
|
{
|
|
ASSERT(0); /*Should not come here*/
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Store the actual chroma mode */
|
|
ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
|
|
}
|
|
|
|
/* update the total bytes produced */
|
|
ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
|
|
|
|
/* store the final chrm bits accumulated */
|
|
*pi4_chrm_tu_bits = chrm_tu_bits;
|
|
|
|
return (chrm_cod_cost);
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
|
|
*
|
|
* \brief
|
|
* Final RDOPT mode process function. Performs Recon computation for the
|
|
* final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
|
|
*
|
|
* \param[in] pv_ctxt : pointer to enc_loop module
|
|
* \param[in] ps_prms : pointer to struct containing requisite parameters
|
|
*
|
|
* \return
|
|
* None
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_final_rdopt_mode_prcs(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
|
|
{
|
|
enc_loop_cu_final_prms_t *ps_best_cu_prms;
|
|
tu_enc_loop_out_t *ps_tu_enc_loop;
|
|
tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
|
|
nbr_avail_flags_t s_nbr;
|
|
recon_datastore_t *ps_recon_datastore;
|
|
|
|
ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
|
|
|
|
WORD32 num_tu_in_cu;
|
|
LWORD64 rd_opt_cost;
|
|
WORD32 ctr;
|
|
WORD32 i4_subtu_idx;
|
|
WORD32 cu_size;
|
|
WORD32 cu_pos_x, cu_pos_y;
|
|
WORD32 chrm_present_flag = 1;
|
|
WORD32 num_bytes, total_bytes = 0;
|
|
WORD32 chrm_ctr = 0;
|
|
WORD32 u1_is_cu_coded;
|
|
UWORD8 *pu1_old_ecd_data;
|
|
UWORD8 *pu1_chrm_old_ecd_data;
|
|
UWORD8 *pu1_cur_pred;
|
|
WORD16 *pi2_deq_data;
|
|
WORD16 *pi2_chrm_deq_data;
|
|
WORD16 *pi2_cur_deq_data;
|
|
WORD16 *pi2_cur_deq_data_chrm;
|
|
UWORD8 *pu1_cur_luma_recon;
|
|
UWORD8 *pu1_cur_chroma_recon;
|
|
UWORD8 *pu1_cur_src;
|
|
UWORD8 *pu1_cur_src_chrm;
|
|
UWORD8 *pu1_cur_pred_chrm;
|
|
UWORD8 *pu1_intra_pred_mode;
|
|
UWORD32 *pu4_nbr_flags;
|
|
LWORD64 i8_ssd;
|
|
|
|
cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
|
|
cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
|
|
enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
|
|
|
|
WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
|
|
WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
|
|
UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
|
|
WORD32 src_strd = ps_prms->src_strd;
|
|
UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
|
|
WORD32 pred_strd = ps_prms->pred_strd;
|
|
UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
|
|
WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
|
|
UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
|
|
UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
|
|
WORD32 csbf_strd = ps_prms->csbf_strd;
|
|
UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
|
|
WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
|
|
UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
|
|
WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
|
|
UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
|
|
UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
|
|
UWORD8 u1_cu_size = ps_prms->u1_cu_size;
|
|
WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
|
|
UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
|
|
UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
|
|
/* Get the Chroma pointer and parameters */
|
|
UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
|
|
WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
|
|
UWORD8 u1_compute_spatial_ssd_luma = 0;
|
|
UWORD8 u1_compute_spatial_ssd_chroma = 0;
|
|
/* Get the pointer for function selector */
|
|
ihevc_intra_pred_luma_ref_substitution_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
|
|
ihevc_intra_pred_ref_filtering_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
|
|
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr =
|
|
ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
|
|
|
|
/* Get the best CU parameters */
|
|
ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
|
|
num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
|
|
cu_size = ps_best_cu_prms->u1_cu_size;
|
|
cu_pos_x = u1_cu_pos_x;
|
|
cu_pos_y = u1_cu_pos_y;
|
|
pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
|
|
pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
|
|
ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
|
|
|
|
/* get the first TU pointer */
|
|
ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
|
|
/* get the first TU only enc_loop prms pointer */
|
|
ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
|
|
/*modify quant related param in ctxt based on current cu qp*/
|
|
if((ps_ctxt->i1_cu_qp_delta_enable))
|
|
{
|
|
/*recompute quant related param at every cu level*/
|
|
ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
|
|
|
|
/* get frame level lambda params */
|
|
ihevce_get_cl_cu_lambda_prms(
|
|
ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
|
|
}
|
|
|
|
ps_best_cu_prms->i8_cu_ssd = 0;
|
|
ps_best_cu_prms->u4_cu_open_intra_sad = 0;
|
|
|
|
/* For skip case : Set TU_size = CU_size and make cbf = 0
|
|
so that same TU loop can be used for all modes */
|
|
if(PRED_MODE_SKIP == packed_pred_mode)
|
|
{
|
|
for(ctr = 0; ctr < num_tu_in_cu; ctr++)
|
|
{
|
|
ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
|
|
|
|
ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
|
|
|
|
ps_tu_enc_loop++;
|
|
ps_tu_enc_loop_temp_prms++;
|
|
}
|
|
|
|
/* go back to the first TU pointer */
|
|
ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
|
|
ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
|
|
}
|
|
/** For inter case, pred calculation is outside the loop **/
|
|
if(PRED_MODE_INTRA != packed_pred_mode)
|
|
{
|
|
/**------------- Compute pred data if required --------------**/
|
|
if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
|
|
{
|
|
nbr_4x4_t *ps_topleft_nbr_4x4;
|
|
nbr_4x4_t *ps_left_nbr_4x4;
|
|
nbr_4x4_t *ps_top_nbr_4x4;
|
|
WORD32 nbr_4x4_left_strd;
|
|
|
|
ps_best_inter_cand->pu1_pred_data = pu1_pred;
|
|
ps_best_inter_cand->i4_pred_data_stride = pred_strd;
|
|
|
|
/* Get the CU nbr information */
|
|
ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
|
|
ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
|
|
ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
|
|
nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
|
|
|
|
/* MVP ,MVD calc and Motion compensation */
|
|
rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
|
|
ps_ctxt,
|
|
ps_best_inter_cand,
|
|
u1_cu_size,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
ps_left_nbr_4x4,
|
|
ps_top_nbr_4x4,
|
|
ps_topleft_nbr_4x4,
|
|
nbr_4x4_left_strd,
|
|
rd_opt_best_idx);
|
|
}
|
|
|
|
/** ------ Motion Compensation for Chroma -------- **/
|
|
if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
|
|
{
|
|
UWORD8 *pu1_cur_pred;
|
|
pu1_cur_pred = pu1_pred_chrm;
|
|
|
|
/* run a loop over all the partitons in cu */
|
|
for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
|
|
{
|
|
pu_t *ps_pu;
|
|
WORD32 inter_pu_wd, inter_pu_ht;
|
|
|
|
ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
|
|
|
|
/* IF AMP then each partitions can have diff wd ht */
|
|
inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
|
|
inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
|
|
inter_pu_ht <<= u1_is_422;
|
|
/* chroma mc func */
|
|
ihevce_chroma_inter_pred_pu(
|
|
&ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
|
|
if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
|
|
{
|
|
/* 2Nx__ partion case */
|
|
if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
|
|
{
|
|
pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
|
|
}
|
|
/* __x2N partion case */
|
|
if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
|
|
{
|
|
pu1_cur_pred += inter_pu_wd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
|
|
pi2_chrm_deq_data =
|
|
&ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
|
|
pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
|
|
pu1_chrm_old_ecd_data =
|
|
&ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
|
|
|
|
/* default value for cu coded flag */
|
|
u1_is_cu_coded = 0;
|
|
|
|
/* If we are re-computing coeff, set sad to 0 and start accumulating */
|
|
/* else use the best cand. sad from RDOPT stage */
|
|
if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
|
|
{
|
|
/*init of ssd of CU accuumulated over all TU*/
|
|
ps_best_cu_prms->u4_cu_sad = 0;
|
|
|
|
/* reset the luma residual bits */
|
|
ps_best_cu_prms->u4_cu_luma_res_bits = 0;
|
|
}
|
|
|
|
if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
|
|
{
|
|
/* reset the chroma residual bits */
|
|
ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
|
|
}
|
|
|
|
if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
|
|
(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
|
|
{
|
|
/*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
|
|
the quantized coefficients might be changed.
|
|
We are copying only those states which correspond to the header from the cabac state
|
|
of the previous CU, because the header is going to be recomputed for this condition*/
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
|
|
memcpy(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
|
|
IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
|
|
{
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
(&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
|
|
(&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX),
|
|
(IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
|
|
}
|
|
else
|
|
{
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
(&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
|
|
(&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX),
|
|
(IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
|
|
}
|
|
ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
|
|
}
|
|
|
|
/* Zero cbf tool is disabled for intra CUs */
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
#if ENABLE_ZERO_CBF_IN_INTRA
|
|
ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
|
|
#else
|
|
ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#if DISABLE_ZERO_ZBF_IN_INTER
|
|
ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
|
|
#else
|
|
ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
|
|
#endif
|
|
}
|
|
|
|
/** Loop for all tu blocks in current cu and do reconstruction **/
|
|
for(ctr = 0; ctr < num_tu_in_cu; ctr++)
|
|
{
|
|
tu_t *ps_tu;
|
|
WORD32 trans_size, num_4x4_in_tu;
|
|
WORD32 cbf, zero_rows, zero_cols;
|
|
WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
|
|
WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
|
|
WORD32 luma_pred_mode, chroma_pred_mode = 0;
|
|
UWORD8 au1_is_recon_available[2];
|
|
|
|
ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
|
|
|
|
u1_compute_spatial_ssd_luma = 0;
|
|
u1_compute_spatial_ssd_chroma = 0;
|
|
|
|
trans_size = 1 << (ps_tu->b3_size + 2);
|
|
num_4x4_in_tu = (trans_size >> 2);
|
|
cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
|
|
cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
|
|
|
|
/* populate the coeffs scan idx */
|
|
ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
/* get the current pos x and pos y in pixels */
|
|
cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
|
|
cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
|
|
|
|
/* Update pointers based on the location */
|
|
pu1_cur_src = pu1_src + cu_pos_x_in_pix;
|
|
pu1_cur_src += (cu_pos_y_in_pix * src_strd);
|
|
pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
|
|
pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
|
|
|
|
pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
|
|
pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
|
|
|
|
pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
|
|
pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
|
|
|
|
pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
|
|
pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
|
|
|
|
pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
|
|
pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
|
|
|
|
pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
|
|
pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
|
|
|
|
pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
|
|
pi2_cur_deq_data_chrm +=
|
|
((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
|
|
|
|
/* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
|
|
chrm_present_flag = 1; /* by default chroma present is set to 1*/
|
|
|
|
if(4 == trans_size)
|
|
{
|
|
/* if tusize is 4x4 then only first luma 4x4 will have chroma*/
|
|
if(0 != chrm_ctr)
|
|
{
|
|
chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
|
|
}
|
|
|
|
/* increment the chrm ctr unconditionally */
|
|
chrm_ctr++;
|
|
/* after ctr reached 4 reset it */
|
|
if(4 == chrm_ctr)
|
|
{
|
|
chrm_ctr = 0;
|
|
}
|
|
}
|
|
|
|
/**------------- Compute pred data if required --------------**/
|
|
if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
|
|
{
|
|
/* Get the pred mode for scan idx calculation, even if pred is not required */
|
|
luma_pred_mode = *pu1_intra_pred_mode;
|
|
|
|
if((ps_ctxt->i4_rc_pass == 1) ||
|
|
(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
|
|
{
|
|
WORD32 nbr_flags;
|
|
WORD32 luma_pred_func_idx;
|
|
UWORD8 *pu1_left;
|
|
UWORD8 *pu1_top;
|
|
UWORD8 *pu1_top_left;
|
|
WORD32 left_strd;
|
|
|
|
/* left cu boundary */
|
|
if(0 == cu_pos_x_in_pix)
|
|
{
|
|
left_strd = ps_cu_nbr_prms->cu_left_stride;
|
|
pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
|
|
}
|
|
else
|
|
{
|
|
pu1_left = pu1_cur_luma_recon - 1;
|
|
left_strd = recon_luma_strd;
|
|
}
|
|
|
|
/* top cu boundary */
|
|
if(0 == cu_pos_y_in_pix)
|
|
{
|
|
pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
|
|
}
|
|
else
|
|
{
|
|
pu1_top = pu1_cur_luma_recon - recon_luma_strd;
|
|
}
|
|
|
|
/* by default top left is set to cu top left */
|
|
pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
|
|
|
|
/* top left based on position */
|
|
if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
|
|
{
|
|
pu1_top_left = pu1_left - left_strd;
|
|
}
|
|
else if(0 != cu_pos_x_in_pix)
|
|
{
|
|
pu1_top_left = pu1_top - 1;
|
|
}
|
|
|
|
/* get the neighbour availability flags */
|
|
nbr_flags = ihevce_get_nbr_intra(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x_in_4x4,
|
|
cu_pos_y_in_4x4,
|
|
num_4x4_in_tu);
|
|
|
|
if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
|
|
{
|
|
/* copy the nbr flags for chroma reuse */
|
|
if(4 != trans_size)
|
|
{
|
|
*pu4_nbr_flags = nbr_flags;
|
|
}
|
|
else if(1 == chrm_present_flag)
|
|
{
|
|
/* compute the avail flags assuming luma trans is 8x8 */
|
|
/* get the neighbour availability flags */
|
|
*pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x_in_4x4,
|
|
cu_pos_y_in_4x4,
|
|
(num_4x4_in_tu << 1),
|
|
(num_4x4_in_tu << 1));
|
|
}
|
|
|
|
/* call reference array substitution */
|
|
ihevc_intra_pred_luma_ref_substitution_fptr(
|
|
pu1_top_left,
|
|
pu1_top,
|
|
pu1_left,
|
|
left_strd,
|
|
trans_size,
|
|
nbr_flags,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1);
|
|
|
|
/* call reference filtering */
|
|
ihevc_intra_pred_ref_filtering_fptr(
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
trans_size,
|
|
(UWORD8 *)ps_ctxt->pv_ref_filt_out,
|
|
luma_pred_mode,
|
|
ps_ctxt->i1_strong_intra_smoothing_enable_flag);
|
|
|
|
/* use the look up to get the function idx */
|
|
luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ps_ctxt->apf_lum_ip[luma_pred_func_idx](
|
|
(UWORD8 *)ps_ctxt->pv_ref_filt_out,
|
|
1,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
trans_size,
|
|
luma_pred_mode);
|
|
}
|
|
}
|
|
else if(
|
|
(1 == chrm_present_flag) &&
|
|
(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
|
|
{
|
|
WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
|
|
|
|
if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
|
|
{
|
|
temp_num_4x4_in_tu = num_4x4_in_tu << 1;
|
|
}
|
|
|
|
*pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x_in_4x4,
|
|
cu_pos_y_in_4x4,
|
|
temp_num_4x4_in_tu,
|
|
temp_num_4x4_in_tu);
|
|
}
|
|
|
|
/* Get the pred mode for scan idx calculation, even if pred is not required */
|
|
chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
|
|
}
|
|
|
|
if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
|
|
{
|
|
WORD32 temp_bits;
|
|
LWORD64 temp_cost;
|
|
UWORD32 u4_tu_sad;
|
|
WORD32 perform_sbh, perform_rdoq;
|
|
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
/* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
|
|
if(trans_size < 16)
|
|
{
|
|
/* for modes from 22 upto 30 horizontal scan is used */
|
|
if((luma_pred_mode > 21) && (luma_pred_mode < 31))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_HORZ;
|
|
}
|
|
/* for modes from 6 upto 14 horizontal scan is used */
|
|
else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_VERT;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
if(ps_prms->u1_recompute_sbh_and_rdoq)
|
|
{
|
|
perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
|
|
perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
|
|
}
|
|
else
|
|
{
|
|
/* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
|
|
perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
|
|
/* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
|
|
we would have to do RDOQ again.*/
|
|
perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
|
|
}
|
|
|
|
#if DISABLE_RDOQ_INTRA
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
perform_rdoq = 0;
|
|
}
|
|
#endif
|
|
/*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
|
|
so that all candidates and best candidate are quantized with same rounding factor */
|
|
if(1 == perform_rdoq)
|
|
{
|
|
ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
|
|
}
|
|
|
|
cbf = ihevce_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_src,
|
|
src_strd,
|
|
pi2_cur_deq_data,
|
|
cu_size, /*deq_data stride is cu_size*/
|
|
pu1_cur_luma_recon,
|
|
recon_luma_strd,
|
|
pu1_final_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
trans_size,
|
|
packed_pred_mode,
|
|
&temp_cost,
|
|
&num_bytes,
|
|
&temp_bits,
|
|
&u4_tu_sad,
|
|
&zero_cols,
|
|
&zero_rows,
|
|
&au1_is_recon_available[0],
|
|
perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
|
|
perform_sbh,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
|
|
100.0,
|
|
ps_prms->u1_is_cu_noisy,
|
|
#endif
|
|
u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
|
|
1 /*early cbf*/
|
|
); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
|
|
|
|
/* Accumulate luma residual bits */
|
|
ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cbf)
|
|
{
|
|
/* update to new state only if CBF is non zero */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
|
|
/* accumulate the TU sad into cu sad */
|
|
ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
|
|
ps_tu->b1_y_cbf = cbf;
|
|
ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
|
|
|
|
/* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
|
|
if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
|
|
{
|
|
WORD32 num_4x4_in_cu = u1_cu_size >> 2;
|
|
nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
|
|
ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
|
|
ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
|
|
/* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
|
|
ps_cur_nbr_4x4->b1_y_cbf = cbf;
|
|
/*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
|
|
ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
|
|
/* Qp and cbf are stored for the all 4x4 in TU */
|
|
{
|
|
WORD32 i, j;
|
|
nbr_4x4_t *ps_tmp_4x4;
|
|
ps_tmp_4x4 = ps_cur_nbr_4x4;
|
|
|
|
for(i = 0; i < num_4x4_in_tu; i++)
|
|
{
|
|
for(j = 0; j < num_4x4_in_tu; j++)
|
|
{
|
|
ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
|
|
ps_tmp_4x4[j].b1_y_cbf = cbf;
|
|
}
|
|
/* row level update*/
|
|
ps_tmp_4x4 += num_4x4_in_cu;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
|
|
zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
|
|
}
|
|
else
|
|
{
|
|
num_bytes = 0;
|
|
}
|
|
|
|
/* copy luma ecd data to final buffer */
|
|
memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
|
|
|
|
pu1_old_ecd_data += num_bytes;
|
|
|
|
au1_is_recon_available[0] = 0;
|
|
}
|
|
|
|
/**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/
|
|
if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
|
|
(!u1_compute_spatial_ssd_luma ||
|
|
(!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
|
|
{
|
|
if(!ps_recon_datastore->u1_is_lumaRecon_available ||
|
|
(ps_recon_datastore->u1_is_lumaRecon_available &&
|
|
(UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
|
|
{
|
|
ihevce_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data,
|
|
cu_size,
|
|
pu1_cur_pred,
|
|
pred_strd,
|
|
pu1_cur_luma_recon,
|
|
recon_luma_strd,
|
|
pu1_final_ecd_data,
|
|
trans_size,
|
|
packed_pred_mode,
|
|
ps_tu->b1_y_cbf,
|
|
zero_cols,
|
|
zero_rows);
|
|
}
|
|
else if(
|
|
ps_recon_datastore->u1_is_lumaRecon_available &&
|
|
(UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
|
|
{
|
|
UWORD8 *pu1_recon_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
|
|
cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_copy_2d(
|
|
pu1_cur_luma_recon,
|
|
recon_luma_strd,
|
|
pu1_recon_src,
|
|
ps_recon_datastore->i4_lumaRecon_stride,
|
|
trans_size,
|
|
trans_size);
|
|
}
|
|
}
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
|
|
}
|
|
|
|
pu1_final_ecd_data += num_bytes;
|
|
/* update total bytes consumed */
|
|
total_bytes += num_bytes;
|
|
|
|
u1_is_cu_coded |= ps_tu->b1_y_cbf;
|
|
|
|
/***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
|
|
if(1 == chrm_present_flag)
|
|
{
|
|
pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
|
|
pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
|
|
|
|
pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
|
|
pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
|
|
|
|
pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
|
|
pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
|
|
|
|
pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
|
|
pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
|
|
(u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
|
|
|
|
if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
|
|
(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
|
|
(PRED_MODE_INTRA != packed_pred_mode))
|
|
{
|
|
WORD32 i4_num_bytes;
|
|
UWORD8 *pu1_chroma_pred;
|
|
UWORD8 *pu1_chroma_recon;
|
|
WORD16 *pi2_chroma_deq;
|
|
UWORD32 u4_zero_col;
|
|
UWORD32 u4_zero_row;
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
|
|
{
|
|
WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
|
|
WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
|
|
WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
|
|
|
|
if(0 == u1_is_422)
|
|
{
|
|
i4_subtu_pos_y >>= 1;
|
|
}
|
|
|
|
pu1_chroma_pred =
|
|
pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
|
|
pu1_chroma_recon = pu1_cur_chroma_recon +
|
|
(i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
|
|
pi2_chroma_deq =
|
|
pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
|
|
|
|
u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
|
|
u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
i4_num_bytes =
|
|
ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
|
|
}
|
|
else
|
|
{
|
|
i4_num_bytes = 0;
|
|
}
|
|
|
|
memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
|
|
|
|
pu1_old_ecd_data += i4_num_bytes;
|
|
|
|
au1_is_recon_available[U_PLANE] = 0;
|
|
|
|
if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
|
|
(!u1_compute_spatial_ssd_chroma ||
|
|
(!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
|
|
{
|
|
if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
|
|
(ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX ==
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_chroma_deq,
|
|
cu_size,
|
|
pu1_chroma_pred,
|
|
pred_chrm_strd,
|
|
pu1_chroma_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data,
|
|
chroma_trans_size,
|
|
(i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
|
|
u4_zero_col,
|
|
u4_zero_row,
|
|
U_PLANE);
|
|
}
|
|
else if(
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX !=
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
|
|
{
|
|
UWORD8 *pu1_recon_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
|
|
[U_PLANE][ctr][i4_subtu_idx]]) +
|
|
i4_subtu_pos_x +
|
|
i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_recon_src,
|
|
ps_recon_datastore->i4_lumaRecon_stride,
|
|
pu1_chroma_recon,
|
|
recon_chrma_strd,
|
|
chroma_trans_size,
|
|
chroma_trans_size,
|
|
U_PLANE);
|
|
}
|
|
}
|
|
|
|
u1_is_cu_coded |=
|
|
((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
|
|
|
|
pu1_final_ecd_data += i4_num_bytes;
|
|
total_bytes += i4_num_bytes;
|
|
}
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
|
|
{
|
|
WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
|
|
WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
|
|
WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
|
|
|
|
if(0 == u1_is_422)
|
|
{
|
|
i4_subtu_pos_y >>= 1;
|
|
}
|
|
|
|
pu1_chroma_pred =
|
|
pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
|
|
pu1_chroma_recon = pu1_cur_chroma_recon +
|
|
(i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
|
|
pi2_chroma_deq = pi2_cur_deq_data_chrm +
|
|
(i4_subtu_idx * chroma_trans_size * cu_size) +
|
|
chroma_trans_size;
|
|
|
|
u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
|
|
u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
i4_num_bytes =
|
|
ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
|
|
}
|
|
else
|
|
{
|
|
i4_num_bytes = 0;
|
|
}
|
|
|
|
memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
|
|
|
|
pu1_old_ecd_data += i4_num_bytes;
|
|
|
|
au1_is_recon_available[V_PLANE] = 0;
|
|
|
|
if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
|
|
(!u1_compute_spatial_ssd_chroma ||
|
|
(!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
|
|
{
|
|
if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
|
|
(ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX ==
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_chroma_deq,
|
|
cu_size,
|
|
pu1_chroma_pred,
|
|
pred_chrm_strd,
|
|
pu1_chroma_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data,
|
|
chroma_trans_size,
|
|
(i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
|
|
u4_zero_col,
|
|
u4_zero_row,
|
|
V_PLANE);
|
|
}
|
|
else if(
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX !=
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
|
|
{
|
|
UWORD8 *pu1_recon_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
|
|
[V_PLANE][ctr][i4_subtu_idx]]) +
|
|
i4_subtu_pos_x +
|
|
i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_recon_src,
|
|
ps_recon_datastore->i4_lumaRecon_stride,
|
|
pu1_chroma_recon,
|
|
recon_chrma_strd,
|
|
chroma_trans_size,
|
|
chroma_trans_size,
|
|
V_PLANE);
|
|
}
|
|
}
|
|
|
|
u1_is_cu_coded |=
|
|
((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
|
|
|
|
pu1_final_ecd_data += i4_num_bytes;
|
|
total_bytes += i4_num_bytes;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
|
|
|
|
for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
|
|
{
|
|
WORD32 cb_cbf, cr_cbf;
|
|
WORD32 cb_num_bytes, cr_num_bytes;
|
|
|
|
WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
|
|
|
|
WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
|
|
WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
|
|
|
|
if(0 == u1_is_422)
|
|
{
|
|
i4_subtu_pos_y >>= 1;
|
|
}
|
|
|
|
pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
|
|
pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
|
|
pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
|
|
pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
|
|
|
|
if((PRED_MODE_INTRA == packed_pred_mode) &&
|
|
(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
|
|
{
|
|
WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
|
|
UWORD8 *pu1_left_chrm;
|
|
UWORD8 *pu1_top_chrm;
|
|
UWORD8 *pu1_top_left_chrm;
|
|
|
|
nbr_flags = ihevce_get_intra_chroma_tu_nbr(
|
|
*pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
|
|
|
|
/* left cu boundary */
|
|
if(0 == i4_subtu_pos_x)
|
|
{
|
|
left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
|
|
pu1_left_chrm =
|
|
ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
|
|
}
|
|
else
|
|
{
|
|
pu1_left_chrm = pu1_cur_chroma_recon - 2;
|
|
left_strd_chrm = recon_chrma_strd;
|
|
}
|
|
|
|
/* top cu boundary */
|
|
if(0 == i4_subtu_pos_y)
|
|
{
|
|
pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
|
|
}
|
|
else
|
|
{
|
|
pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
|
|
}
|
|
|
|
/* by default top left is set to cu top left */
|
|
pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
|
|
|
|
/* top left based on position */
|
|
if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
|
|
{
|
|
pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
|
|
}
|
|
else if(0 != i4_subtu_pos_x)
|
|
{
|
|
pu1_top_left_chrm = pu1_top_chrm - 2;
|
|
}
|
|
|
|
/* call the chroma reference array substitution */
|
|
ihevc_intra_pred_chroma_ref_substitution_fptr(
|
|
pu1_top_left_chrm,
|
|
pu1_top_chrm,
|
|
pu1_left_chrm,
|
|
left_strd_chrm,
|
|
chroma_trans_size,
|
|
nbr_flags,
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1);
|
|
|
|
/* use the look up to get the function idx */
|
|
chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
|
|
|
|
/* call the intra prediction function */
|
|
ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
|
|
(UWORD8 *)ps_ctxt->pv_ref_sub_out,
|
|
1,
|
|
pu1_cur_pred_chrm,
|
|
pred_chrm_strd,
|
|
chroma_trans_size,
|
|
chroma_pred_mode);
|
|
}
|
|
|
|
/**---------- Compute iq&coeff data if required : Chroma ------------**/
|
|
if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
|
|
{
|
|
WORD32 perform_sbh, perform_rdoq, temp_bits;
|
|
|
|
if(ps_prms->u1_recompute_sbh_and_rdoq)
|
|
{
|
|
perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
|
|
perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
|
|
}
|
|
else
|
|
{
|
|
/* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
|
|
perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
|
|
/* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
|
|
we would have to do RDOQ again.*/
|
|
perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
|
|
}
|
|
|
|
/* populate the coeffs scan idx */
|
|
ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
|
|
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
/* for 4x4 transforms based on intra pred mode scan is choosen*/
|
|
if(4 == chroma_trans_size)
|
|
{
|
|
/* for modes from 22 upto 30 horizontal scan is used */
|
|
if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_HORZ;
|
|
}
|
|
/* for modes from 6 upto 14 horizontal scan is used */
|
|
else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
|
|
{
|
|
ps_ctxt->i4_scan_idx = SCAN_VERT;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if DISABLE_RDOQ_INTRA
|
|
if(PRED_MODE_INTRA == packed_pred_mode)
|
|
{
|
|
perform_rdoq = 0;
|
|
}
|
|
#endif
|
|
|
|
/* RDOPT copy States : TU init (best until prev TU) to current */
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
|
|
ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
|
|
/*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
|
|
so that all candidates and best candidate are quantized with same rounding factor */
|
|
if(1 == perform_rdoq)
|
|
{
|
|
ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
|
|
}
|
|
|
|
if(!ps_best_cu_prms->u1_skip_flag ||
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
|
|
{
|
|
/* Cb */
|
|
cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred_chrm,
|
|
pred_chrm_strd,
|
|
pu1_cur_src_chrm,
|
|
src_chrm_strd,
|
|
pi2_cur_deq_data_chrm,
|
|
cu_size,
|
|
pu1_chrm_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
chroma_trans_size,
|
|
ps_ctxt->i4_scan_idx,
|
|
(PRED_MODE_INTRA == packed_pred_mode),
|
|
&cb_num_bytes,
|
|
&temp_bits,
|
|
&cb_zero_col,
|
|
&cb_zero_row,
|
|
&au1_is_recon_available[U_PLANE],
|
|
perform_sbh,
|
|
perform_rdoq,
|
|
&i8_ssd,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
!ps_ctxt->u1_is_refPic
|
|
? ALPHA_FOR_NOISE_TERM_IN_RDOPT
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
|
|
100.0,
|
|
ps_prms->u1_is_cu_noisy,
|
|
#endif
|
|
ps_best_cu_prms->u1_skip_flag &&
|
|
ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
|
|
u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
|
|
: FREQUENCY_DOMAIN_SSD,
|
|
U_PLANE);
|
|
}
|
|
else
|
|
{
|
|
cb_cbf = 0;
|
|
temp_bits = 0;
|
|
cb_num_bytes = 0;
|
|
au1_is_recon_available[U_PLANE] = 0;
|
|
cb_zero_col = 0;
|
|
cb_zero_row = 0;
|
|
}
|
|
|
|
/* Accumulate chroma residual bits */
|
|
ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cb_cbf)
|
|
{
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
/* RDOPT copy States : Restoring back the Cb init state to Cr */
|
|
else
|
|
{
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
|
|
if(!ps_best_cu_prms->u1_skip_flag ||
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
|
|
{
|
|
/* Cr */
|
|
cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ps_ctxt,
|
|
pu1_cur_pred_chrm,
|
|
pred_chrm_strd,
|
|
pu1_cur_src_chrm,
|
|
src_chrm_strd,
|
|
pi2_cur_deq_data_chrm + chroma_trans_size,
|
|
cu_size,
|
|
pu1_chrm_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data + cb_num_bytes,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
chroma_trans_size,
|
|
ps_ctxt->i4_scan_idx,
|
|
(PRED_MODE_INTRA == packed_pred_mode),
|
|
&cr_num_bytes,
|
|
&temp_bits,
|
|
&cr_zero_col,
|
|
&cr_zero_row,
|
|
&au1_is_recon_available[V_PLANE],
|
|
perform_sbh,
|
|
perform_rdoq,
|
|
&i8_ssd,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
!ps_ctxt->u1_is_refPic
|
|
? ALPHA_FOR_NOISE_TERM_IN_RDOPT
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
|
|
100.0,
|
|
ps_prms->u1_is_cu_noisy,
|
|
#endif
|
|
ps_best_cu_prms->u1_skip_flag &&
|
|
ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
|
|
u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
|
|
: FREQUENCY_DOMAIN_SSD,
|
|
V_PLANE);
|
|
}
|
|
else
|
|
{
|
|
cr_cbf = 0;
|
|
temp_bits = 0;
|
|
cr_num_bytes = 0;
|
|
au1_is_recon_available[V_PLANE] = 0;
|
|
cr_zero_col = 0;
|
|
cr_zero_row = 0;
|
|
}
|
|
|
|
/* Accumulate chroma residual bits */
|
|
ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
|
|
|
|
/* RDOPT copy States : New updated after curr TU to TU init */
|
|
if(0 != cr_cbf)
|
|
{
|
|
COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0] +
|
|
IHEVC_CAB_COEFFX_PREFIX,
|
|
IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
|
|
}
|
|
|
|
if(0 == i4_subtu_idx)
|
|
{
|
|
ps_tu->b1_cb_cbf = cb_cbf;
|
|
ps_tu->b1_cr_cbf = cr_cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
|
|
ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
|
|
cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
|
|
cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
|
|
cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
cb_num_bytes =
|
|
ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
|
|
}
|
|
else
|
|
{
|
|
cb_num_bytes = 0;
|
|
}
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
cr_num_bytes =
|
|
ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
|
|
}
|
|
else
|
|
{
|
|
cr_num_bytes = 0;
|
|
}
|
|
|
|
/* copy cb ecd data to final buffer */
|
|
memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
|
|
|
|
pu1_chrm_old_ecd_data += cb_num_bytes;
|
|
|
|
/* copy cb ecd data to final buffer */
|
|
memcpy(
|
|
(pu1_final_ecd_data + cb_num_bytes),
|
|
pu1_chrm_old_ecd_data,
|
|
cr_num_bytes);
|
|
|
|
pu1_chrm_old_ecd_data += cr_num_bytes;
|
|
|
|
au1_is_recon_available[U_PLANE] = 0;
|
|
au1_is_recon_available[V_PLANE] = 0;
|
|
}
|
|
|
|
/**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/
|
|
if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
|
|
(!u1_compute_spatial_ssd_chroma ||
|
|
(!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
|
|
{
|
|
if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
|
|
(ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX ==
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data_chrm,
|
|
cu_size,
|
|
pu1_cur_pred_chrm,
|
|
pred_chrm_strd,
|
|
pu1_cur_chroma_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data,
|
|
chroma_trans_size,
|
|
(i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
|
|
cb_zero_col,
|
|
cb_zero_row,
|
|
U_PLANE);
|
|
}
|
|
else if(
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX !=
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
|
|
{
|
|
UWORD8 *pu1_recon_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
|
|
[U_PLANE][ctr][i4_subtu_idx]]) +
|
|
i4_subtu_pos_x +
|
|
i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_recon_src,
|
|
ps_recon_datastore->i4_lumaRecon_stride,
|
|
pu1_cur_chroma_recon,
|
|
recon_chrma_strd,
|
|
chroma_trans_size,
|
|
chroma_trans_size,
|
|
U_PLANE);
|
|
}
|
|
}
|
|
|
|
u1_is_cu_coded |=
|
|
((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
|
|
}
|
|
|
|
pu1_final_ecd_data += cb_num_bytes;
|
|
/* update total bytes consumed */
|
|
total_bytes += cb_num_bytes;
|
|
|
|
if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
|
|
(!u1_compute_spatial_ssd_chroma ||
|
|
(!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
|
|
{
|
|
if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
|
|
(ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX ==
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_cur_deq_data_chrm + chroma_trans_size,
|
|
cu_size,
|
|
pu1_cur_pred_chrm,
|
|
pred_chrm_strd,
|
|
pu1_cur_chroma_recon,
|
|
recon_chrma_strd,
|
|
pu1_final_ecd_data,
|
|
chroma_trans_size,
|
|
(i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
|
|
cr_zero_col,
|
|
cr_zero_row,
|
|
V_PLANE);
|
|
}
|
|
else if(
|
|
ps_recon_datastore->au1_is_chromaRecon_available[0] &&
|
|
(UCHAR_MAX !=
|
|
ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
|
|
{
|
|
UWORD8 *pu1_recon_src =
|
|
((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
|
|
[ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
|
|
[V_PLANE][ctr][i4_subtu_idx]]) +
|
|
i4_subtu_pos_x +
|
|
i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_recon_src,
|
|
ps_recon_datastore->i4_lumaRecon_stride,
|
|
pu1_cur_chroma_recon,
|
|
recon_chrma_strd,
|
|
chroma_trans_size,
|
|
chroma_trans_size,
|
|
V_PLANE);
|
|
}
|
|
}
|
|
|
|
u1_is_cu_coded |=
|
|
((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
|
|
}
|
|
|
|
pu1_final_ecd_data += cr_num_bytes;
|
|
/* update total bytes consumed */
|
|
total_bytes += cr_num_bytes;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
|
|
ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
|
|
ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
|
|
ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
|
|
ps_tu->b1_cb_cbf = 0;
|
|
ps_tu->b1_cr_cbf = 0;
|
|
ps_tu->b1_cb_cbf_subtu1 = 0;
|
|
ps_tu->b1_cr_cbf_subtu1 = 0;
|
|
}
|
|
|
|
/* Update to next TU */
|
|
ps_tu_enc_loop++;
|
|
ps_tu_enc_loop_temp_prms++;
|
|
|
|
pu4_nbr_flags++;
|
|
pu1_intra_pred_mode++;
|
|
|
|
/*Do not set the nbr map for last pu in cu */
|
|
if((num_tu_in_cu - 1) != ctr)
|
|
{
|
|
/* set the neighbour map to 1 */
|
|
ihevce_set_nbr_map(
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
cu_pos_x_in_4x4,
|
|
cu_pos_y_in_4x4,
|
|
(trans_size >> 2),
|
|
1);
|
|
}
|
|
}
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
|
|
|
|
/* Modify skip flag, if luma is skipped & Chroma is coded */
|
|
if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
|
|
{
|
|
ps_best_cu_prms->u1_skip_flag = 0;
|
|
}
|
|
}
|
|
|
|
/* during chroma evaluation if skip decision was over written */
|
|
/* then the current skip candidate is set to a non skip candidate */
|
|
if(PRED_MODE_INTRA != packed_pred_mode)
|
|
{
|
|
ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
|
|
}
|
|
|
|
/**------------- Compute header data if required --------------**/
|
|
if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
|
|
{
|
|
WORD32 cbf_bits;
|
|
WORD32 cu_bits;
|
|
WORD32 unit_4x4_size = cu_size >> 2;
|
|
|
|
/*Restoring the running reference into the best rdopt_ctxt cabac states which will then
|
|
be copied as the base reference for the next cu
|
|
Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
|
|
luma and chroma are being reevaluated*/
|
|
COPY_CABAC_STATES(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
|
|
.s_cabac_ctxt.au1_ctxt_models[0],
|
|
&ps_ctxt->au1_rdopt_init_ctxt_models[0],
|
|
IHEVC_CAB_CTXT_END);
|
|
|
|
/* get the neighbour availability flags for current cu */
|
|
ihevce_get_only_nbr_flag(
|
|
&s_nbr,
|
|
ps_ctxt->pu1_ctb_nbr_map,
|
|
ps_ctxt->i4_nbr_map_strd,
|
|
(cu_pos_x << 1),
|
|
(cu_pos_y << 1),
|
|
unit_4x4_size,
|
|
unit_4x4_size);
|
|
|
|
cu_bits = ihevce_entropy_rdo_encode_cu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
ps_best_cu_prms,
|
|
cu_pos_x,
|
|
cu_pos_y,
|
|
cu_size,
|
|
ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
|
|
: s_nbr.u1_top_avail,
|
|
s_nbr.u1_left_avail,
|
|
(pu1_final_ecd_data - total_bytes),
|
|
&cbf_bits);
|
|
|
|
/* cbf bits are excluded from header bits, instead considered as texture bits */
|
|
ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
|
|
ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
|
|
}
|
|
|
|
if(ps_prms->u1_will_cabac_state_change)
|
|
{
|
|
ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_set_eval_flags \endif
|
|
*
|
|
* \brief
|
|
* Function which decides which eval flags have to be set based on present
|
|
* and RDOQ conditions
|
|
*
|
|
* \param[in] ps_ctxt : encoder ctxt pointer
|
|
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
|
|
*
|
|
* \return
|
|
* None
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
void ihevce_set_eval_flags(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
|
|
{
|
|
WORD32 count = 0;
|
|
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
|
|
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
|
|
|
|
if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
|
|
{
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
|
|
}
|
|
else
|
|
{
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
|
|
}
|
|
|
|
if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
|
|
(1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
|
|
{
|
|
/* When rdoq is enabled only for the best candidate, in case of in Intra nTU
|
|
RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
|
|
for the current CU will change. Therefore, we need to reevaluate the pred data*/
|
|
if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
|
|
(ps_enc_loop_bestprms->u1_intra_flag == 1))
|
|
{
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
|
|
}
|
|
if(ps_enc_loop_bestprms->u1_skip_flag == 1)
|
|
{
|
|
for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_luma_iq_and_coeff_data = 0;
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_luma_iq_and_coeff_data = 1;
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data = 1;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch(ps_ctxt->i4_quality_preset)
|
|
{
|
|
case IHEVCE_QUALITY_P0:
|
|
case IHEVCE_QUALITY_P2:
|
|
case IHEVCE_QUALITY_P3:
|
|
{
|
|
for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_luma_iq_and_coeff_data = 0;
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data =
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case IHEVCE_QUALITY_P4:
|
|
case IHEVCE_QUALITY_P5:
|
|
{
|
|
for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_luma_iq_and_coeff_data = 0;
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data =
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case IHEVCE_QUALITY_P6:
|
|
{
|
|
for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_luma_iq_and_coeff_data = 0;
|
|
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data =
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
|
|
#else
|
|
if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
|
|
(ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data =
|
|
ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
|
|
}
|
|
else
|
|
{
|
|
ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
|
|
.b1_eval_chroma_iq_and_coeff_data =
|
|
!ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Not recomputing Luma pred-data and header data for any preset now */
|
|
ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
|
|
}
|
|
|
|
/**
|
|
******************************************************************************
|
|
*
|
|
* @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
|
|
* (not coded children) into a parent node(not coded).
|
|
*
|
|
* @par Description
|
|
* This is required post RDO evaluation as TU decisions are
|
|
* pre-determined(pre RDO) based on recursive SATD,
|
|
* while the quad children TU's can be skipped during RDO
|
|
*
|
|
* The shrink process is applied iteratively till there are no
|
|
* more modes to shrink
|
|
*
|
|
* @param[inout] ps_tu_enc_loop
|
|
* pointer to tu enc loop params of inter cu
|
|
*
|
|
* @param[inout] ps_tu_enc_loop_temp_prms
|
|
* pointer to temp tu enc loop params of inter cu
|
|
*
|
|
* @param[in] num_tu_in_cu
|
|
* number of tus in cu
|
|
*
|
|
* @return modified number of tus in cu
|
|
*
|
|
******************************************************************************
|
|
*/
|
|
WORD32 ihevce_shrink_inter_tu_tree(
|
|
tu_enc_loop_out_t *ps_tu_enc_loop,
|
|
tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
|
|
recon_datastore_t *ps_recon_datastore,
|
|
WORD32 num_tu_in_cu,
|
|
UWORD8 u1_is_422)
|
|
{
|
|
WORD32 recurse = 1;
|
|
WORD32 ctr;
|
|
|
|
/* ------------- Quadtree TU Split Transform flag optimization ------------ */
|
|
/* Post RDO, if all 4 child nodes are not coded the overheads of split TU */
|
|
/* flags and cbf flags are saved by merging to parent node and marking */
|
|
/* parent TU as not coded */
|
|
/* */
|
|
/* ParentTUSplit=1 */
|
|
/* | */
|
|
/* --------------------------------------------------------- */
|
|
/* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */
|
|
/* || */
|
|
/* \/ */
|
|
/* */
|
|
/* ParentTUSplit=0 (Not Coded) */
|
|
/* */
|
|
/* ------------- Quadtree TU Split Transform flag optimization ------------ */
|
|
while((num_tu_in_cu > 4) && recurse)
|
|
{
|
|
recurse = 0;
|
|
|
|
/* Validate inter CU */
|
|
//ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
|
|
|
|
/* loop for all tu blocks in current cu */
|
|
for(ctr = 0; ctr < num_tu_in_cu;)
|
|
{
|
|
/* Get current tu posx, posy and size */
|
|
WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
|
|
WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
|
|
/* +1 is for parents size */
|
|
WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
|
|
|
|
/* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
|
|
WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
|
|
eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
|
|
|
|
/* As TUs are published in encode order (Z SCAN), */
|
|
/* Four consecutive TUS of same size implies we have hit leaf nodes. */
|
|
if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
|
|
((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
|
|
((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
|
|
eval_merge)
|
|
{
|
|
WORD32 merge_parent = 1;
|
|
|
|
/* If any leaf noded is coded, it cannot be merged to parent */
|
|
if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
|
|
(ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
|
|
(ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
|
|
{
|
|
merge_parent = 0;
|
|
}
|
|
|
|
if(u1_is_422)
|
|
{
|
|
if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
|
|
(ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
|
|
(ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
|
|
(ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
|
|
|
|
(ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
|
|
(ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
|
|
{
|
|
merge_parent = 0;
|
|
}
|
|
}
|
|
|
|
if(merge_parent)
|
|
{
|
|
/* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
|
|
|
|
if(ps_recon_datastore->u1_is_lumaRecon_available)
|
|
{
|
|
ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
|
|
|
|
memmove(
|
|
&ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
|
|
&ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
|
|
(num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
|
|
}
|
|
|
|
if(ps_recon_datastore->au1_is_chromaRecon_available[0])
|
|
{
|
|
ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
|
|
UCHAR_MAX;
|
|
ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
|
|
UCHAR_MAX;
|
|
|
|
memmove(
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
|
|
(num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
|
|
|
|
memmove(
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
|
|
(num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
|
|
|
|
if(u1_is_422)
|
|
{
|
|
ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
|
|
UCHAR_MAX;
|
|
ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
|
|
UCHAR_MAX;
|
|
|
|
memmove(
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
|
|
(num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
|
|
|
|
memmove(
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
|
|
&ps_recon_datastore
|
|
->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
|
|
(num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
|
|
}
|
|
}
|
|
|
|
/* Parent node size is one more than that of child */
|
|
ps_tu_enc_loop[ctr].s_tu.b3_size++;
|
|
|
|
ctr++;
|
|
|
|
/* move the subsequent TUs to next element */
|
|
ASSERT(num_tu_in_cu >= (ctr + 3));
|
|
memmove(
|
|
(void *)(ps_tu_enc_loop + ctr),
|
|
(void *)(ps_tu_enc_loop + ctr + 3),
|
|
(num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
|
|
|
|
/* Also memmove the temp TU params */
|
|
memmove(
|
|
(void *)(ps_tu_enc_loop_temp_prms + ctr),
|
|
(void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
|
|
(num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
|
|
|
|
/* Number of TUs in CU are now less by 3 */
|
|
num_tu_in_cu -= 3;
|
|
|
|
/* Recurse again as new parent also be can be merged later */
|
|
recurse = 1;
|
|
}
|
|
else
|
|
{
|
|
/* Go to next set of leaf nodes */
|
|
ctr += 4;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ctr++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* return the modified num TUs*/
|
|
ASSERT(num_tu_in_cu > 0);
|
|
return (num_tu_in_cu);
|
|
}
|
|
|
|
UWORD8 ihevce_intra_mode_nxn_hash_updater(
|
|
UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
|
|
{
|
|
WORD32 i;
|
|
WORD32 i4_mode;
|
|
|
|
for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
|
|
{
|
|
if(pu1_mode_array[i] < 35)
|
|
{
|
|
if(pu1_mode_array[i] != 0)
|
|
{
|
|
i4_mode = pu1_mode_array[i] - 1;
|
|
|
|
if(!pu1_hash_table[i4_mode])
|
|
{
|
|
pu1_hash_table[i4_mode] = 1;
|
|
pu1_mode_array[u1_num_ipe_modes] = i4_mode;
|
|
u1_num_ipe_modes++;
|
|
}
|
|
}
|
|
|
|
if(pu1_mode_array[i] != 34)
|
|
{
|
|
i4_mode = pu1_mode_array[i] + 1;
|
|
|
|
if((!pu1_hash_table[i4_mode]))
|
|
{
|
|
pu1_hash_table[i4_mode] = 1;
|
|
pu1_mode_array[u1_num_ipe_modes] = i4_mode;
|
|
u1_num_ipe_modes++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(!pu1_hash_table[INTRA_PLANAR])
|
|
{
|
|
pu1_hash_table[INTRA_PLANAR] = 1;
|
|
pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
|
|
u1_num_ipe_modes++;
|
|
}
|
|
|
|
if(!pu1_hash_table[INTRA_DC])
|
|
{
|
|
pu1_hash_table[INTRA_DC] = 1;
|
|
pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
|
|
u1_num_ipe_modes++;
|
|
}
|
|
|
|
return u1_num_ipe_modes;
|
|
}
|
|
|
|
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
|
|
WORD32 ihevce_determine_tu_tree_distribution(
|
|
cu_inter_cand_t *ps_cu_data,
|
|
me_func_selector_t *ps_func_selector,
|
|
WORD16 *pi2_scratch_mem,
|
|
UWORD8 *pu1_inp,
|
|
WORD32 i4_inp_stride,
|
|
WORD32 i4_lambda,
|
|
UWORD8 u1_lambda_q_shift,
|
|
UWORD8 u1_cu_size,
|
|
UWORD8 u1_max_tr_depth)
|
|
{
|
|
err_prms_t s_err_prms;
|
|
|
|
PF_SAD_FXN_TU_REC pf_err_compute[4];
|
|
|
|
WORD32 i4_satd;
|
|
|
|
s_err_prms.pi4_sad_grid = &i4_satd;
|
|
s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
|
|
s_err_prms.pu1_inp = pu1_inp;
|
|
s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
|
|
s_err_prms.i4_inp_stride = i4_inp_stride;
|
|
s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
|
|
s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
|
|
|
|
if(u1_cu_size == 64)
|
|
{
|
|
s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
|
|
}
|
|
else
|
|
{
|
|
s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
|
|
}
|
|
|
|
pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
|
|
pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
|
|
pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
|
|
pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
|
|
|
|
i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
|
|
&s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
|
|
|
|
if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
|
|
{
|
|
ps_cu_data->ai4_tu_split_flag[0] = 1;
|
|
}
|
|
|
|
return i4_satd;
|
|
}
|
|
#endif
|
|
|
|
void ihevce_populate_nbr_4x4_with_pu_data(
|
|
nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
|
|
{
|
|
WORD32 i, j;
|
|
|
|
nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
|
|
|
|
WORD32 ht = (ps_pu->b4_ht + 1);
|
|
WORD32 wd = (ps_pu->b4_wd + 1);
|
|
|
|
ps_nbr_4x4->b1_intra_flag = 0;
|
|
ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
|
|
ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
|
|
ps_nbr_4x4->mv = ps_pu->mv;
|
|
|
|
for(i = 0; i < ht; i++)
|
|
{
|
|
for(j = 0; j < wd; j++)
|
|
{
|
|
ps_tmp_4x4[j] = *ps_nbr_4x4;
|
|
}
|
|
|
|
ps_tmp_4x4 += i4_nbr_buf_stride;
|
|
}
|
|
}
|
|
|
|
void ihevce_call_luma_inter_pred_rdopt_pass1(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
|
|
{
|
|
pu_t *ps_pu;
|
|
UWORD8 *pu1_pred;
|
|
WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
|
|
WORD32 inter_pu_wd, inter_pu_ht;
|
|
|
|
pu1_pred = ps_inter_cand->pu1_pred_data_scr;
|
|
pred_stride = ps_inter_cand->i4_pred_data_stride;
|
|
num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
|
|
|
|
for(ctr = 0; ctr < num_cu_part; ctr++)
|
|
{
|
|
ps_pu = &ps_inter_cand->as_inter_pu[ctr];
|
|
|
|
/* IF AMP then each partitions can have diff wd ht */
|
|
inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
|
|
inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
|
|
|
|
skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
|
|
//if(0 == skip_or_merge_flag)
|
|
{
|
|
ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
|
|
}
|
|
if((2 == num_cu_part) && (0 == ctr))
|
|
{
|
|
/* 2Nx__ partion case */
|
|
if(inter_pu_wd == cu_size)
|
|
{
|
|
pu1_pred += (inter_pu_ht * pred_stride);
|
|
}
|
|
|
|
/* __x2N partion case */
|
|
if(inter_pu_ht == cu_size)
|
|
{
|
|
pu1_pred += inter_pu_wd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
LWORD64 ihevce_it_recon_ssd(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
UWORD8 *pu1_src,
|
|
WORD32 i4_src_strd,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 i4_pred_strd,
|
|
WORD16 *pi2_deq_data,
|
|
WORD32 i4_deq_data_strd,
|
|
UWORD8 *pu1_recon,
|
|
WORD32 i4_recon_stride,
|
|
UWORD8 *pu1_ecd_data,
|
|
UWORD8 u1_trans_size,
|
|
UWORD8 u1_pred_mode,
|
|
WORD32 i4_cbf,
|
|
WORD32 i4_zero_col,
|
|
WORD32 i4_zero_row,
|
|
CHROMA_PLANE_ID_T e_chroma_plane)
|
|
{
|
|
if(NULL_PLANE == e_chroma_plane)
|
|
{
|
|
ihevce_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_deq_data,
|
|
i4_deq_data_strd,
|
|
pu1_pred,
|
|
i4_pred_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
u1_trans_size,
|
|
u1_pred_mode,
|
|
i4_cbf,
|
|
i4_zero_col,
|
|
i4_zero_row);
|
|
|
|
return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
|
|
pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
|
|
e_chroma_plane);
|
|
}
|
|
else
|
|
{
|
|
ihevce_chroma_it_recon_fxn(
|
|
ps_ctxt,
|
|
pi2_deq_data,
|
|
i4_deq_data_strd,
|
|
pu1_pred,
|
|
i4_pred_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
u1_trans_size,
|
|
i4_cbf,
|
|
i4_zero_col,
|
|
i4_zero_row,
|
|
e_chroma_plane);
|
|
|
|
return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
|
pu1_recon,
|
|
pu1_src,
|
|
i4_recon_stride,
|
|
i4_src_strd,
|
|
u1_trans_size,
|
|
u1_trans_size,
|
|
e_chroma_plane);
|
|
}
|
|
}
|
|
|
|
/*!
|
|
******************************************************************************
|
|
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
|
|
*
|
|
* \brief
|
|
* Transform unit level (Chroma) enc_loop function
|
|
*
|
|
* \param[in] ps_ctxt enc_loop module ctxt pointer
|
|
* \param[in] pu1_pred pointer to predicted data buffer
|
|
* \param[in] pred_strd predicted buffer stride
|
|
* \param[in] pu1_src pointer to source data buffer
|
|
* \param[in] src_strd source buffer stride
|
|
* \param[in] pi2_deq_data pointer to store iq data
|
|
* \param[in] deq_data_strd iq data buffer stride
|
|
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
|
|
* \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
|
|
* block
|
|
* \param[out] csbf_strd csbf buffer stride
|
|
* \param[in] trans_size transform size (4, 8, 16)
|
|
* \param[in] intra_flag 0:Inter/Skip 1:Intra
|
|
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
|
|
* coeff buffer
|
|
the current TU in RDopt Mode
|
|
* \param[out] pi4_zero_col pointer to store the zero_col info for the TU
|
|
* \param[out] pi4_zero_row pointer to store the zero_row info for the TU
|
|
*
|
|
* \return
|
|
* CBF of the current block
|
|
*
|
|
* \author
|
|
* Ittiam
|
|
*
|
|
*****************************************************************************
|
|
*/
|
|
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|
ihevce_enc_loop_ctxt_t *ps_ctxt,
|
|
UWORD8 *pu1_pred,
|
|
WORD32 pred_strd,
|
|
UWORD8 *pu1_src,
|
|
WORD32 src_strd,
|
|
WORD16 *pi2_deq_data,
|
|
WORD32 deq_data_strd,
|
|
UWORD8 *pu1_recon,
|
|
WORD32 i4_recon_stride,
|
|
UWORD8 *pu1_ecd_data,
|
|
UWORD8 *pu1_csbf_buf,
|
|
WORD32 csbf_strd,
|
|
WORD32 trans_size,
|
|
WORD32 i4_scan_idx,
|
|
WORD32 intra_flag,
|
|
WORD32 *pi4_coeff_off,
|
|
WORD32 *pi4_tu_bits,
|
|
WORD32 *pi4_zero_col,
|
|
WORD32 *pi4_zero_row,
|
|
UWORD8 *pu1_is_recon_available,
|
|
WORD32 i4_perform_sbh,
|
|
WORD32 i4_perform_rdoq,
|
|
LWORD64 *pi8_cost,
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
WORD32 i4_alpha_stim_multiplier,
|
|
UWORD8 u1_is_cu_noisy,
|
|
#endif
|
|
UWORD8 u1_is_skip,
|
|
SSD_TYPE_T e_ssd_type,
|
|
CHROMA_PLANE_ID_T e_chroma_plane)
|
|
{
|
|
WORD32 trans_idx, cbf, u4_blk_sad;
|
|
WORD16 *pi2_quant_coeffs;
|
|
WORD16 *pi2_trans_values;
|
|
WORD32 quant_scale_mat_offset;
|
|
WORD32 *pi4_trans_scratch;
|
|
WORD32 *pi4_subBlock2csbfId_map = NULL;
|
|
|
|
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
|
|
#endif
|
|
|
|
rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
|
|
|
|
WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
|
|
(!intra_flag && ENABLE_INTER_ZCU_COST);
|
|
WORD32 i4_perform_coeff_level_rdoq =
|
|
(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
|
|
(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
|
|
|
|
ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
|
|
ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
|
|
|
|
*pi4_coeff_off = 0;
|
|
*pi4_tu_bits = 0;
|
|
pu1_is_recon_available[0] = 0;
|
|
|
|
pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
|
|
pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
|
|
pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
|
|
|
|
if(2 == trans_size)
|
|
{
|
|
trans_size = 4;
|
|
}
|
|
|
|
/* translate the transform size to index */
|
|
trans_idx = trans_size >> 2;
|
|
|
|
if(16 == trans_size)
|
|
{
|
|
trans_idx = 3;
|
|
}
|
|
|
|
if(u1_is_skip)
|
|
{
|
|
pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
|
pu1_pred,
|
|
pu1_src,
|
|
pred_strd,
|
|
src_strd,
|
|
trans_size,
|
|
trans_size,
|
|
e_chroma_plane);
|
|
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
/* buffer copy fromp pred to recon */
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_pred,
|
|
pred_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
trans_size,
|
|
trans_size,
|
|
e_chroma_plane);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
#endif
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
#if !WEIGH_CHROMA_COST
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
|
|
#else
|
|
ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
|
|
#endif
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
if(intra_flag == 1)
|
|
{
|
|
quant_scale_mat_offset = 0;
|
|
|
|
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
ai4_quant_rounding_factors[0][0] =
|
|
MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
|
|
for(i = 0; i < trans_size * trans_size; i++)
|
|
{
|
|
ai4_quant_rounding_factors[1][i] =
|
|
MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
|
|
(1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
ai4_quant_rounding_factors[2][i] =
|
|
MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
|
|
(1 << QUANT_ROUND_FACTOR_Q) / 3);
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
quant_scale_mat_offset = NUM_TRANS_TYPES;
|
|
}
|
|
|
|
switch(trans_size)
|
|
{
|
|
case 4:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
|
|
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
|
|
|
|
break;
|
|
}
|
|
case 16:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
|
|
|
|
break;
|
|
}
|
|
case 32:
|
|
{
|
|
pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* ---------- call residue and transform block ------- */
|
|
u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
|
|
pu1_src,
|
|
pu1_pred,
|
|
pi4_trans_scratch,
|
|
pi2_trans_values,
|
|
src_strd,
|
|
pred_strd,
|
|
trans_size,
|
|
e_chroma_plane);
|
|
(void)u4_blk_sad;
|
|
/* -------- calculate SSD calculation in Transform Domain ------ */
|
|
|
|
cbf = ps_ctxt->apf_quant_iquant_ssd
|
|
[i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
|
|
|
|
(pi2_trans_values,
|
|
ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
|
|
pi2_quant_coeffs,
|
|
pi2_deq_data,
|
|
trans_size,
|
|
ps_ctxt->i4_chrm_cu_qp_div6,
|
|
ps_ctxt->i4_chrm_cu_qp_mod6,
|
|
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
|
|
ps_ctxt->i4_quant_rnd_factor[intra_flag],
|
|
ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
|
|
ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
|
|
#else
|
|
intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
|
|
intra_flag ? ai4_quant_rounding_factors[1]
|
|
: ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
|
|
intra_flag ? ai4_quant_rounding_factors[2]
|
|
: ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
|
|
#endif
|
|
trans_size,
|
|
trans_size,
|
|
deq_data_strd,
|
|
pu1_csbf_buf,
|
|
csbf_strd,
|
|
pi4_zero_col,
|
|
pi4_zero_row,
|
|
ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
|
|
pi8_cost);
|
|
|
|
if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
|
|
{
|
|
pi8_cost[0] = UINT_MAX;
|
|
}
|
|
|
|
if(0 != cbf)
|
|
{
|
|
if(i4_perform_sbh || i4_perform_rdoq)
|
|
{
|
|
ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
|
|
ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
|
|
|
|
ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
|
|
ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
|
|
ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
|
|
ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
|
|
ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
|
|
|
|
ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
|
|
ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
|
|
ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
|
|
ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
|
|
ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
|
|
ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
|
|
ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
|
|
|
|
if((!i4_perform_rdoq))
|
|
{
|
|
ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
|
|
|
|
pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
|
|
}
|
|
}
|
|
|
|
/* ------- call coeffs scan function ------- */
|
|
*pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
|
|
pi2_quant_coeffs,
|
|
pi4_subBlock2csbfId_map,
|
|
i4_scan_idx,
|
|
trans_size,
|
|
pu1_ecd_data,
|
|
pu1_csbf_buf,
|
|
csbf_strd);
|
|
}
|
|
|
|
/* Normalize Cost. Note : trans_idx, not (trans_idx-1) */
|
|
pi8_cost[0] >>= ga_trans_shift[trans_idx];
|
|
|
|
#if RDOPT_ZERO_CBF_ENABLE
|
|
if((0 != cbf))
|
|
{
|
|
WORD32 tu_bits;
|
|
LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
|
|
|
|
zero_cbf_cost_u = 0;
|
|
|
|
/*Populating the feilds of rdoq_ctxt structure*/
|
|
if(i4_perform_rdoq)
|
|
{
|
|
//memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
|
|
/* transform size to log2transform size */
|
|
GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
|
|
ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
|
|
|
|
ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
|
|
ps_rdoq_sbh_ctxt->i4_is_luma = 0;
|
|
ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
|
|
ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
|
|
(1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
|
|
ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
|
|
ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
|
|
ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
|
|
}
|
|
else if(i4_perform_zcbf)
|
|
{
|
|
/* cost of zero cbf encoding */
|
|
zero_cbf_cost_u =
|
|
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
|
pu1_pred,
|
|
pu1_src,
|
|
pred_strd,
|
|
src_strd,
|
|
trans_size,
|
|
trans_size,
|
|
e_chroma_plane);
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* call the entropy rdo encode to get the bit estimate for current tu */
|
|
/* note that tu includes only residual coding bits and does not include */
|
|
/* tu split, cbf and qp delta encoding bits for a TU */
|
|
/************************************************************************/
|
|
if(i4_perform_rdoq)
|
|
{
|
|
tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
0,
|
|
ps_rdoq_sbh_ctxt,
|
|
pi8_cost,
|
|
&zero_cbf_cost_u,
|
|
0);
|
|
//Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
|
|
|
|
if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
|
|
{
|
|
cbf = 0;
|
|
|
|
/* num bytes is set to 0 */
|
|
*pi4_coeff_off = 0;
|
|
}
|
|
|
|
(*pi4_tu_bits) += tu_bits;
|
|
|
|
if((i4_perform_sbh) && (0 != cbf))
|
|
{
|
|
ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
|
|
|
|
ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
|
|
|
|
pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
|
|
}
|
|
|
|
/*Add round value before normalizing*/
|
|
pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
|
|
pi8_cost[0] >>= ga_trans_shift[trans_idx];
|
|
|
|
if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
|
|
{
|
|
*pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
|
|
pi2_quant_coeffs,
|
|
pi4_subBlock2csbfId_map,
|
|
i4_scan_idx,
|
|
trans_size,
|
|
pu1_ecd_data,
|
|
ps_rdoq_sbh_ctxt->pu1_csbf_buf,
|
|
csbf_strd);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/************************************************************************/
|
|
/* call the entropy rdo encode to get the bit estimate for current tu */
|
|
/* note that tu includes only residual coding bits and does not include */
|
|
/* tu split, cbf and qp delta encoding bits for a TU */
|
|
/************************************************************************/
|
|
tu_bits = ihevce_entropy_rdo_encode_tu(
|
|
&ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
|
|
|
|
(*pi4_tu_bits) += tu_bits;
|
|
}
|
|
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
pi8_cost[0] = ihevce_it_recon_ssd(
|
|
ps_ctxt,
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi2_deq_data,
|
|
deq_data_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
PRED_MODE_INTRA,
|
|
cbf,
|
|
pi4_zero_col[0],
|
|
pi4_zero_row[0],
|
|
e_chroma_plane);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pi8_cost[0],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
i4_alpha_stim_multiplier,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
#endif
|
|
|
|
curr_cb_cod_cost = pi8_cost[0];
|
|
|
|
/* add the SSD cost to bits estimate given by ECD */
|
|
curr_cb_cod_cost +=
|
|
COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
|
|
|
|
if(i4_perform_zcbf)
|
|
{
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
|
|
{
|
|
zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
zero_cbf_cost_u,
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
#endif
|
|
/* force the tu as zero cbf if zero_cbf_cost is lower */
|
|
if(zero_cbf_cost_u < curr_cb_cod_cost)
|
|
{
|
|
*pi4_coeff_off = 0;
|
|
cbf = 0;
|
|
(*pi4_tu_bits) = 0;
|
|
pi8_cost[0] = zero_cbf_cost_u;
|
|
|
|
pu1_is_recon_available[0] = 0;
|
|
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
|
|
pu1_pred,
|
|
pred_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
trans_size,
|
|
trans_size,
|
|
e_chroma_plane);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
}
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
if(!intra_flag)
|
|
{
|
|
#if !WEIGH_CHROMA_COST
|
|
ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
|
|
#else
|
|
ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
|
|
(zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
|
{
|
|
pi8_cost[0] = ihevce_it_recon_ssd(
|
|
ps_ctxt,
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi2_deq_data,
|
|
deq_data_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pu1_ecd_data,
|
|
trans_size,
|
|
PRED_MODE_INTRA,
|
|
cbf,
|
|
pi4_zero_col[0],
|
|
pi4_zero_row[0],
|
|
e_chroma_plane);
|
|
|
|
pu1_is_recon_available[0] = 1;
|
|
}
|
|
|
|
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
|
|
if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_recon,
|
|
i4_recon_stride,
|
|
pi8_cost[0],
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
|
|
{
|
|
pi8_cost[0] = ihevce_inject_stim_into_distortion(
|
|
pu1_src,
|
|
src_strd,
|
|
pu1_pred,
|
|
pred_strd,
|
|
pi8_cost[0],
|
|
!ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
|
|
: ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
|
|
(double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
|
|
100.0,
|
|
trans_size,
|
|
0,
|
|
ps_ctxt->u1_enable_psyRDOPT,
|
|
e_chroma_plane);
|
|
}
|
|
#endif
|
|
|
|
#if ENABLE_INTER_ZCU_COST
|
|
if(!intra_flag)
|
|
{
|
|
#if !WEIGH_CHROMA_COST
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
|
|
#else
|
|
/* cbf = 0, accumulate cu not coded cost */
|
|
|
|
ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
|
|
(pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
|
|
(1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
|
|
CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
|
|
#endif
|
|
}
|
|
#endif
|
|
}
|
|
#endif /* RDOPT_ZERO_CBF_ENABLE */
|
|
|
|
return (cbf);
|
|
}
|