404 lines
9.9 KiB
ArmAsm
404 lines
9.9 KiB
ArmAsm
///******************************************************************************
|
|
// *
|
|
// * Copyright (C) 2018 The Android Open Source Project
|
|
// *
|
|
// * Licensed under the Apache License, Version 2.0 (the "License");
|
|
// * you may not use this file except in compliance with the License.
|
|
// * You may obtain a copy of the License at:
|
|
// *
|
|
// * http://www.apache.org/licenses/LICENSE-2.0
|
|
// *
|
|
// * Unless required by applicable law or agreed to in writing, software
|
|
// * distributed under the License is distributed on an "AS IS" BASIS,
|
|
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// * See the License for the specific language governing permissions and
|
|
// * limitations under the License.
|
|
// *
|
|
// *****************************************************************************
|
|
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
//*/
|
|
|
|
|
|
.macro push_v_regs
|
|
stp q8, q9, [sp, #-32]!
|
|
stp q10, q11, [sp, #-32]!
|
|
stp q12, q13, [sp, #-32]!
|
|
stp q14, q15, [sp, #-32]!
|
|
stp x21, x22, [sp, #-16]!
|
|
stp x23, x24, [sp, #-16]!
|
|
.endm
|
|
.macro pop_v_regs
|
|
ldp x23, x24, [sp], #16
|
|
ldp x21, x22, [sp], #16
|
|
ldp q14, q15, [sp], #32
|
|
ldp q12, q13, [sp], #32
|
|
ldp q10, q11, [sp], #32
|
|
ldp q8, q9, [sp], #32
|
|
.endm
|
|
|
|
.macro swp reg1, reg2
|
|
MOV X16, \reg1
|
|
MOV \reg1, \reg2
|
|
MOV \reg2, x16
|
|
.endm
|
|
.text
|
|
.global ixheaacd_sbr_qmfsyn64_winadd
|
|
|
|
ixheaacd_sbr_qmfsyn64_winadd:
|
|
|
|
push_v_regs
|
|
|
|
|
|
|
|
MOV w7, #0x8000
|
|
LD1 {v0.4h}, [x0], #8
|
|
MOV x12, x2
|
|
|
|
dup v30.4s, w7
|
|
LD1 {v1.4h}, [x2], #8
|
|
dup v22.4s, w4
|
|
|
|
MOV x10, x0
|
|
MOV x11, x2
|
|
ADD x0, x0, #504
|
|
ADD x2, x2, #248
|
|
|
|
NEG v28.4s, v22.4s
|
|
sshL v20.4s, v30.4s, v28.4s
|
|
MOV x6, #64
|
|
LSL x6, x6, #1
|
|
ADD x12, x12, x6
|
|
MOV x7, #128
|
|
LSL x9, x7, #1
|
|
ADD x1, x1, x9
|
|
MOV x6, #16
|
|
MOV x7, #128
|
|
LSL x9, x7, #1
|
|
MOV x7, #256
|
|
LSL x8, x7, #1
|
|
|
|
LSL x5, x5, #1
|
|
LD1 {v2.4h}, [x0], x8
|
|
mov v26.16b, v20.16b
|
|
|
|
|
|
sMLAL v26.4s, v0.4h, v1.4h
|
|
LD1 {v3.4h}, [x2], x9
|
|
|
|
LD1 {v4.4h}, [x0], x8
|
|
sMLAL v26.4s, v2.4h, v3.4h
|
|
|
|
LD1 {v5.4h}, [x2], x9
|
|
|
|
LD1 {v6.4h}, [x0], x8
|
|
sMLAL v26.4s, v5.4h, v4.4h
|
|
|
|
LD1 {v7.4h}, [x2], x9
|
|
|
|
LD1 {v8.4h}, [x0], x8
|
|
sMLAL v26.4s, v7.4h, v6.4h
|
|
|
|
LD1 {v9.4h}, [x2], x9
|
|
MOV x0, x10
|
|
|
|
|
|
MOV x2, x11
|
|
LD1 {v10.4h}, [x1], #8
|
|
sMLAL v26.4s, v9.4h, v8.4h
|
|
|
|
MOV x10, x1
|
|
LD1 {v11.4h}, [x12], #8
|
|
ADD x1, x1, #504
|
|
|
|
|
|
|
|
MOV x11, x12
|
|
LD1 {v12.4h}, [x1], x8
|
|
ADD x12, x12, #248
|
|
|
|
sMLAL v26.4s, v10.4h, v11.4h
|
|
LD1 {v13.4h}, [x12], x9
|
|
|
|
LD1 {v14.4h}, [x1], x8
|
|
sMLAL v26.4s, v12.4h, v13.4h
|
|
|
|
LD1 {v15.4h}, [x12], x9
|
|
|
|
LD1 {v16.4h}, [x1], x8
|
|
sMLAL v26.4s, v15.4h, v14.4h
|
|
|
|
LD1 {v17.4h}, [x12], x9
|
|
|
|
LD1 {v18.4h}, [x1], x8
|
|
sMLAL v26.4s, v17.4h, v16.4h
|
|
|
|
LD1 {v19.4h}, [x12], x9
|
|
|
|
sMLAL v26.4s, v19.4h, v18.4h
|
|
LD1 {v0.4h}, [x0], #8
|
|
MOV x12, x11
|
|
|
|
MOV x1, x10
|
|
LD1 {v1.4h}, [x2], #8
|
|
MOV x10, x0
|
|
|
|
sQshL v26.4s, v26.4s, v22.4s
|
|
|
|
ADD x0, x0, #504
|
|
|
|
MOV x11, x2
|
|
LD1 {v2.4h}, [x0], x8
|
|
ADD x2, x2, #248
|
|
|
|
sshR v28.4s, v26.4s, #16
|
|
LD1 {v3.4h}, [x2], x9
|
|
|
|
|
|
UZP2 v29.8h, v28.8h, v28.8h
|
|
UZP1 v28.8h, v28.8h, v28.8h
|
|
mov v26.16b, v20.16b
|
|
|
|
|
|
|
|
|
|
LD1 {v4.4h}, [x0], x8
|
|
LD1 {v5.4h}, [x2], x9
|
|
|
|
LD1 {v6.4h}, [x0], x8
|
|
LD1 {v7.4h}, [x2], x9
|
|
|
|
LD1 {v8.4h}, [x0], x8
|
|
LD1 {v9.4h}, [x2], x9
|
|
MOV x0, x10
|
|
|
|
|
|
MOV x2, x11
|
|
LD1 {v10.4h}, [x1], #8
|
|
|
|
MOV x10, x1
|
|
LD1 {v11.4h}, [x12], #8
|
|
ADD x1, x1, #504
|
|
|
|
|
|
MOV x11, x12
|
|
LD1 {v12.4h}, [x1], x8
|
|
ADD x12, x12, #248
|
|
|
|
|
|
LD1 {v13.4h}, [x12], x9
|
|
|
|
LD1 {v14.4h}, [x1], x8
|
|
LD1 {v15.4h}, [x12], x9
|
|
|
|
LD1 {v16.4h}, [x1], x8
|
|
LD1 {v17.4h}, [x12], x9
|
|
|
|
LD1 {v18.4h}, [x1], x8
|
|
SUB x6, x6, #2
|
|
LD1 {v19.4h}, [x12], x9
|
|
MOV x1, x10
|
|
|
|
MOV x12, x11
|
|
|
|
LOOP_1:
|
|
|
|
sMLAL v26.4s, v0.4h, v1.4h
|
|
ST1 {v28.h}[0], [x3], x5
|
|
|
|
sMLAL v26.4s, v2.4h, v3.4h
|
|
LD1 {v0.4h}, [x0], #8
|
|
sMLAL v26.4s, v5.4h, v4.4h
|
|
|
|
sMLAL v26.4s, v7.4h, v6.4h
|
|
ST1 {v28.h}[1], [x3], x5
|
|
|
|
|
|
MOV x10, x0
|
|
LD1 {v1.4h}, [x2], #8
|
|
ADD x0, x0, #504
|
|
|
|
sMLAL v26.4s, v9.4h, v8.4h
|
|
ST1 {v28.h}[2], [x3], x5
|
|
|
|
sMLAL v26.4s, v10.4h, v11.4h
|
|
ST1 {v28.h}[3], [x3], x5
|
|
|
|
MOV x11, x2
|
|
LD1 {v2.4h}, [x0], x8
|
|
ADD x2, x2, #248
|
|
|
|
sMLAL v26.4s, v12.4h, v13.4h
|
|
LD1 {v3.4h}, [x2], x9
|
|
sMLAL v26.4s, v15.4h, v14.4h
|
|
|
|
sMLAL v26.4s, v17.4h, v16.4h
|
|
LD1 {v4.4h}, [x0], x8
|
|
sMLAL v26.4s, v19.4h, v18.4h
|
|
|
|
LD1 {v5.4h}, [x2], x9
|
|
|
|
LD1 {v6.4h}, [x0], x8
|
|
sQshL v26.4s, v26.4s, v22.4s
|
|
|
|
sshR v28.4s, v26.4s, #16
|
|
LD1 {v7.4h}, [x2], x9
|
|
mov v26.16b, v20.16b
|
|
|
|
|
|
UZP2 v29.8h, v28.8h, v28.8h
|
|
UZP1 v28.8h, v28.8h, v28.8h
|
|
sMLAL v26.4s, v0.4h, v1.4h
|
|
|
|
sMLAL v26.4s, v2.4h, v3.4h
|
|
LD1 {v8.4h}, [x0], x8
|
|
sMLAL v26.4s, v5.4h, v4.4h
|
|
|
|
sMLAL v26.4s, v7.4h, v6.4h
|
|
LD1 {v9.4h}, [x2], x9
|
|
|
|
|
|
LD1 {v10.4h}, [x1], #8
|
|
sMLAL v26.4s, v9.4h, v8.4h
|
|
|
|
MOV x2, x11
|
|
LD1 {v11.4h}, [x12], #8
|
|
MOV x0, x10
|
|
|
|
MOV x10, x1
|
|
|
|
ADD x1, x1, #504
|
|
|
|
MOV x11, x12
|
|
LD1 {v12.4h}, [x1], x8
|
|
ADD x12, x12, #248
|
|
|
|
LD1 {v13.4h}, [x12], x9
|
|
sMLAL v26.4s, v10.4h, v11.4h
|
|
|
|
LD1 {v14.4h}, [x1], x8
|
|
sMLAL v26.4s, v12.4h, v13.4h
|
|
|
|
LD1 {v15.4h}, [x12], x9
|
|
|
|
LD1 {v16.4h}, [x1], x8
|
|
sMLAL v26.4s, v15.4h, v14.4h
|
|
|
|
LD1 {v17.4h}, [x12], x9
|
|
|
|
LD1 {v18.4h}, [x1], x8
|
|
sMLAL v26.4s, v17.4h, v16.4h
|
|
|
|
LD1 {v19.4h}, [x12], x9
|
|
MOV x1, x10
|
|
|
|
sMLAL v26.4s, v19.4h, v18.4h
|
|
ST1 {v28.h}[0], [x3], x5
|
|
|
|
MOV x12, x11
|
|
LD1 {v0.4h}, [x0], #8
|
|
|
|
LD1 {v1.4h}, [x2], #8
|
|
sQshL v26.4s, v26.4s, v22.4s
|
|
|
|
|
|
ST1 {v28.h}[1], [x3], x5
|
|
MOV x10, x0
|
|
|
|
ST1 {v28.h}[2], [x3], x5
|
|
ADD x0, x0, #504
|
|
|
|
ST1 {v28.h}[3], [x3], x5
|
|
MOV x11, x2
|
|
|
|
sshR v28.4s, v26.4s, #16
|
|
LD1 {v2.4h}, [x0], x8
|
|
ADD x2, x2, #248
|
|
|
|
LD1 {v3.4h}, [x2], x9
|
|
LD1 {v4.4h}, [x0], x8
|
|
LD1 {v5.4h}, [x2], x9
|
|
LD1 {v6.4h}, [x0], x8
|
|
LD1 {v7.4h}, [x2], x9
|
|
LD1 {v8.4h}, [x0], x8
|
|
LD1 {v9.4h}, [x2], x9
|
|
|
|
UZP2 v29.8h, v28.8h, v28.8h
|
|
UZP1 v28.8h, v28.8h, v28.8h
|
|
mov v26.16b, v20.16b
|
|
|
|
|
|
|
|
|
|
MOV x0, x10
|
|
LD1 {v10.4h}, [x1], #8
|
|
MOV x2, x11
|
|
|
|
MOV x10, x1
|
|
LD1 {v11.4h}, [x12], #8
|
|
ADD x1, x1, #504
|
|
|
|
|
|
MOV x11, x12
|
|
LD1 {v12.4h}, [x1], x8
|
|
ADD x12, x12, #248
|
|
|
|
|
|
LD1 {v13.4h}, [x12], x9
|
|
|
|
LD1 {v14.4h}, [x1], x8
|
|
LD1 {v15.4h}, [x12], x9
|
|
|
|
LD1 {v16.4h}, [x1], x8
|
|
LD1 {v17.4h}, [x12], x9
|
|
|
|
SUBS x6, x6, #2
|
|
LD1 {v18.4h}, [x1], x8
|
|
|
|
MOV x1, x10
|
|
LD1 {v19.4h}, [x12], x9
|
|
|
|
MOV x12, x11
|
|
|
|
|
|
BGT LOOP_1
|
|
|
|
sMLAL v26.4s, v0.4h, v1.4h
|
|
ST1 {v28.h}[0], [x3], x5
|
|
sMLAL v26.4s, v2.4h, v3.4h
|
|
|
|
sMLAL v26.4s, v5.4h, v4.4h
|
|
ST1 {v28.h}[1], [x3], x5
|
|
sMLAL v26.4s, v7.4h, v6.4h
|
|
|
|
sMLAL v26.4s, v9.4h, v8.4h
|
|
ST1 {v28.h}[2], [x3], x5
|
|
sMLAL v26.4s, v10.4h, v11.4h
|
|
|
|
sMLAL v26.4s, v12.4h, v13.4h
|
|
ST1 {v28.h}[3], [x3], x5
|
|
sMLAL v26.4s, v15.4h, v14.4h
|
|
|
|
|
|
|
|
sMLAL v26.4s, v17.4h, v16.4h
|
|
|
|
sMLAL v26.4s, v19.4h, v18.4h
|
|
|
|
sQshL v26.4s, v26.4s, v22.4s
|
|
|
|
sshR v28.4s, v26.4s, #16
|
|
|
|
UZP2 v29.8h, v28.8h, v28.8h
|
|
UZP1 v28.8h, v28.8h, v28.8h
|
|
|
|
|
|
ST1 {v28.h}[0], [x3], x5
|
|
ST1 {v28.h}[1], [x3], x5
|
|
ST1 {v28.h}[2], [x3], x5
|
|
ST1 {v28.h}[3], [x3], x5
|
|
|
|
|
|
pop_v_regs
|
|
ret
|
|
|