281 lines
6.0 KiB
ArmAsm
281 lines
6.0 KiB
ArmAsm
///*****************************************************************************
|
|
//*
|
|
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
|
|
//*
|
|
//* Licensed under the Apache License, Version 2.0 (the "License");
|
|
//* you may not use this file except in compliance with the License.
|
|
//* You may obtain a copy of the License at:
|
|
//*
|
|
//* http://www.apache.org/licenses/LICENSE-2.0
|
|
//*
|
|
//* Unless required by applicable law or agreed to in writing, software
|
|
//* distributed under the License is distributed on an "AS IS" BASIS,
|
|
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
//* See the License for the specific language governing permissions and
|
|
//* limitations under the License.
|
|
//*
|
|
//*****************************************************************************/
|
|
///**
|
|
// *******************************************************************************
|
|
// * ,:file
|
|
// * ihevc_mem_fns_neon.s
|
|
// *
|
|
// * ,:brief
|
|
// * Contains function definitions for memory manipulation
|
|
// *
|
|
// * ,:author
|
|
// * Naveen SR
|
|
// *
|
|
// * ,:par List of Functions:
|
|
// * - ihevc_memcpy()
|
|
// * - ihevc_memset_mul_8()
|
|
// * - ihevc_memset_16bit_mul_8()
|
|
// *
|
|
// * ,:remarks
|
|
// * None
|
|
// *
|
|
// *******************************************************************************
|
|
//*/
|
|
|
|
///**
|
|
//*******************************************************************************
|
|
//*
|
|
//* ,:brief
|
|
//* memcpy of a 1d array
|
|
//*
|
|
//* ,:par Description:
|
|
//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
|
|
//*
|
|
//* ,:param[in] pu1_dst
|
|
//* UWORD8 pointer to the destination
|
|
//*
|
|
//* ,:param[in] pu1_src
|
|
//* UWORD8 pointer to the source
|
|
//*
|
|
//* ,:param[in] num_bytes
|
|
//* number of bytes to copy
|
|
//* ,:returns
|
|
//*
|
|
//* ,:remarks
|
|
//* None
|
|
//*
|
|
//*******************************************************************************
|
|
//*/
|
|
//void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
|
|
// UWORD8 *pu1_src,
|
|
// UWORD8 num_bytes)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu1_dst
|
|
// x1 => *pu1_src
|
|
// x2 => num_bytes
|
|
|
|
.text
|
|
.p2align 2
|
|
|
|
|
|
.global ihevc_memcpy_mul_8_av8
|
|
.type ihevc_memcpy_mul_8_av8, %function
|
|
|
|
ihevc_memcpy_mul_8_av8:
|
|
|
|
LOOP_NEON_MEMCPY_MUL_8:
|
|
// Memcpy 8 bytes
|
|
LD1 {v0.8b},[x1],#8
|
|
ST1 {v0.8b},[x0],#8
|
|
|
|
SUBS x2,x2,#8
|
|
BNE LOOP_NEON_MEMCPY_MUL_8
|
|
ret
|
|
|
|
|
|
|
|
//*******************************************************************************
|
|
//*/
|
|
//void ihevc_memcpy(UWORD8 *pu1_dst,
|
|
// UWORD8 *pu1_src,
|
|
// UWORD8 num_bytes)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu1_dst
|
|
// x1 => *pu1_src
|
|
// x2 => num_bytes
|
|
|
|
|
|
|
|
.global ihevc_memcpy_av8
|
|
.type ihevc_memcpy_av8, %function
|
|
|
|
ihevc_memcpy_av8:
|
|
SUBS x2,x2,#8
|
|
BLT ARM_MEMCPY
|
|
LOOP_NEON_MEMCPY:
|
|
// Memcpy 8 bytes
|
|
LD1 {v0.8b},[x1],#8
|
|
ST1 {v0.8b},[x0],#8
|
|
|
|
SUBS x2,x2,#8
|
|
BGE LOOP_NEON_MEMCPY
|
|
CMN x2,#8
|
|
BEQ MEMCPY_RETURN
|
|
|
|
ARM_MEMCPY:
|
|
ADD x2,x2,#8
|
|
|
|
LOOP_ARM_MEMCPY:
|
|
LDRB w3,[x1],#1
|
|
STRB w3,[x0],#1
|
|
SUBS x2,x2,#1
|
|
BNE LOOP_ARM_MEMCPY
|
|
MEMCPY_RETURN:
|
|
ret
|
|
|
|
|
|
|
|
|
|
//void ihevc_memset_mul_8(UWORD8 *pu1_dst,
|
|
// UWORD8 value,
|
|
// UWORD8 num_bytes)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu1_dst
|
|
// x1 => value
|
|
// x2 => num_bytes
|
|
|
|
.text
|
|
.p2align 2
|
|
|
|
|
|
|
|
.global ihevc_memset_mul_8_av8
|
|
.type ihevc_memset_mul_8_av8, %function
|
|
|
|
ihevc_memset_mul_8_av8:
|
|
|
|
// Assumptions: numbytes is either 8, 16 or 32
|
|
dup v0.8b,w1
|
|
LOOP_MEMSET_MUL_8:
|
|
// Memset 8 bytes
|
|
ST1 {v0.8b},[x0],#8
|
|
|
|
SUBS x2,x2,#8
|
|
BNE LOOP_MEMSET_MUL_8
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
//void ihevc_memset(UWORD8 *pu1_dst,
|
|
// UWORD8 value,
|
|
// UWORD8 num_bytes)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu1_dst
|
|
// x1 => value
|
|
// x2 => num_bytes
|
|
|
|
|
|
|
|
.global ihevc_memset_av8
|
|
.type ihevc_memset_av8, %function
|
|
|
|
ihevc_memset_av8:
|
|
SUBS x2,x2,#8
|
|
BLT ARM_MEMSET
|
|
dup v0.8b,w1
|
|
LOOP_NEON_MEMSET:
|
|
// Memcpy 8 bytes
|
|
ST1 {v0.8b},[x0],#8
|
|
|
|
SUBS x2,x2,#8
|
|
BGE LOOP_NEON_MEMSET
|
|
CMN x2,#8
|
|
BEQ MEMSET_RETURN
|
|
|
|
ARM_MEMSET:
|
|
ADD x2,x2,#8
|
|
|
|
LOOP_ARM_MEMSET:
|
|
STRB w1,[x0],#1
|
|
SUBS x2,x2,#1
|
|
BNE LOOP_ARM_MEMSET
|
|
|
|
MEMSET_RETURN:
|
|
ret
|
|
|
|
|
|
|
|
|
|
//void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
|
|
// UWORD16 value,
|
|
// UWORD8 num_words)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu2_dst
|
|
// x1 => value
|
|
// x2 => num_words
|
|
|
|
.text
|
|
.p2align 2
|
|
|
|
|
|
|
|
.global ihevc_memset_16bit_mul_8_av8
|
|
.type ihevc_memset_16bit_mul_8_av8, %function
|
|
|
|
ihevc_memset_16bit_mul_8_av8:
|
|
|
|
// Assumptions: num_words is either 8, 16 or 32
|
|
|
|
// Memset 8 words
|
|
dup v0.8h,w1
|
|
LOOP_MEMSET_16BIT_MUL_8:
|
|
ST1 {v0.8h},[x0],#16
|
|
|
|
SUBS x2,x2,#8
|
|
BNE LOOP_MEMSET_16BIT_MUL_8
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
//void ihevc_memset_16bit(UWORD16 *pu2_dst,
|
|
// UWORD16 value,
|
|
// UWORD8 num_words)
|
|
//**************Variables Vs Registers*************************
|
|
// x0 => *pu2_dst
|
|
// x1 => value
|
|
// x2 => num_words
|
|
|
|
|
|
|
|
.global ihevc_memset_16bit_av8
|
|
.type ihevc_memset_16bit_av8, %function
|
|
|
|
ihevc_memset_16bit_av8:
|
|
SUBS x2,x2,#8
|
|
BLT ARM_MEMSET_16BIT
|
|
dup v0.8h,w1
|
|
LOOP_NEON_MEMSET_16BIT:
|
|
// Memset 8 words
|
|
ST1 {v0.8h},[x0],#16
|
|
|
|
SUBS x2,x2,#8
|
|
BGE LOOP_NEON_MEMSET_16BIT
|
|
CMN x2,#8
|
|
BEQ MEMSET_16BIT_RETURN
|
|
|
|
ARM_MEMSET_16BIT:
|
|
ADD x2,x2,#8
|
|
|
|
LOOP_ARM_MEMSET_16BIT:
|
|
STRH w1,[x0],#2
|
|
SUBS x2,x2,#1
|
|
BNE LOOP_ARM_MEMSET_16BIT
|
|
|
|
MEMSET_16BIT_RETURN:
|
|
ret
|
|
|
|
|
|
|
|
|
|
.section .note.GNU-stack,"",%progbits
|
|
|