aosp12/external/libhevc/common/arm64/ihevc_deblk_chroma_horz.s

174 lines
4.6 KiB
ArmAsm

///*****************************************************************************
//*
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************/
///*******************************************************************************
//* @file
//* ihevc_deblk_luma_horz.s
//*
//* @brief
//* contains function definitions for inter prediction interpolation.
//* functions are coded using neon intrinsics and can be compiled using
//* rvct
//*
//* @author
//* anand s
//*
//* @par list of functions:
//*
//*
//* @remarks
//* none
//*
//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
// WORD32 src_strd,
// WORD32 quant_param_p,
// WORD32 quant_param_q,
// WORD32 qp_offset_u,
// WORD32 qp_offset_v,
// WORD32 tc_offset_div2,
// WORD32 filter_flag_p,
// WORD32 filter_flag_q)
//
.text
.align 4
.include "ihevc_neon_macros.s"
.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_av8
.type ihevc_deblk_chroma_horz_av8, %function
ihevc_deblk_chroma_horz_av8:
sxtw x4,w4
sxtw x5,w5
sxtw x6,w6
ldr w9, [sp]
sxtw x9,w9
push_v_regs
stp x19, x20,[sp,#-16]!
mov x10, x4
mov x8, x7
mov x7, x5
mov x4, x6
sub x12,x0,x1
ld1 {v0.8b},[x0]
sub x5,x12,x1
add x6,x0,x1
add x1,x2,x3
uxtl v0.8h, v0.8b
ld1 {v2.8b},[x12]
add x2,x1,#1
ld1 {v4.8b},[x5]
ld1 {v16.8b},[x6]
adds x1,x10,x2,asr #1
uxtl v2.8h, v2.8b
adrp x3, :got:gai4_ihevc_qp_table
ldr x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
bmi l1.3312
cmp x1,#0x39
bgt lbl78
ldr w1, [x3,x1,lsl #2]
lbl78:
sub x20,x1,#6
csel x1, x20, x1,gt
l1.3312:
adds x2,x7,x2,asr #1
uxtl v4.8h, v4.8b
bmi l1.3332
cmp x2,#0x39
bgt lbl85
ldr w2, [x3,x2,lsl #2]
lbl85:
sub x20,x2,#6
csel x2, x20, x2,gt
l1.3332:
add x1,x1,x4,lsl #1
sub v6.8h, v0.8h , v2.8h
add x3,x1,#2
cmp x3,#0x35
mov x20,#0x35
csel x1, x20, x1,gt
shl v6.8h, v6.8h,#2
uxtl v16.8h, v16.8b
bgt l1.3368
adds x3,x1,#2
add x20,x1,#2
csel x1, x20, x1,pl
mov x20,#0
csel x1, x20, x1,mi
l1.3368:
adrp x3, :got:gai4_ihevc_tc_table
ldr x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
add v4.8h, v6.8h , v4.8h
add x2,x2,x4,lsl #1
sub v6.8h, v4.8h , v16.8h
add x4,x2,#2
ldr w1, [x3,x1,lsl #2]
cmp x4,#0x35
mov x20,#0x35
csel x2, x20, x2,gt
bgt l1.3412
adds x4,x2,#2
add x20,x2,#2
csel x2, x20, x2,pl
mov x20,#0
csel x2, x20, x2,mi
l1.3412:
ldr w2, [x3,x2,lsl #2]
cmp x8,#0
dup v31.8h,w2
dup v30.8h,w1
sub x20,x1,#0
neg x1, x20
srshr v6.8h, v6.8h,#3
dup v28.8h,w1
sub x20,x2,#0
neg x1, x20
zip1 v4.8h, v30.8h, v31.8h
dup v29.8h,w1
zip1 v18.8h, v28.8h, v29.8h
smin v16.8h, v6.8h , v4.8h
smax v4.8h, v18.8h , v16.8h
add v2.8h, v2.8h , v4.8h
sub v0.8h, v0.8h , v4.8h
sqxtun v2.8b, v2.8h
sqxtun v0.8b, v0.8h
beq l1.3528
st1 {v2.8b},[x12]
l1.3528:
cmp x9,#0
beq l1.3540
st1 {v0.8b},[x0]
l1.3540:
ldp x19, x20,[sp],#16
pop_v_regs
ret