Remove unused android_memset16()/android_memset32().
Pixelflinger is gone. Test: treehugger Change-Id: I6954ae6a860102be0f6d76f33e76ed3210e5c152
This commit is contained in:
parent
6188334d62
commit
7b8fcfea0e
|
@ -114,36 +114,17 @@ cc_library {
|
|||
},
|
||||
|
||||
android_arm: {
|
||||
srcs: ["arch-arm/memset32.S"],
|
||||
sanitize: {
|
||||
misc_undefined: ["integer"],
|
||||
},
|
||||
},
|
||||
android_arm64: {
|
||||
srcs: ["arch-arm64/android_memset.S"],
|
||||
sanitize: {
|
||||
misc_undefined: ["integer"],
|
||||
},
|
||||
},
|
||||
|
||||
android_mips: {
|
||||
srcs: ["arch-mips/android_memset.c"],
|
||||
sanitize: {
|
||||
misc_undefined: ["integer"],
|
||||
},
|
||||
},
|
||||
android_mips64: {
|
||||
srcs: ["arch-mips/android_memset.c"],
|
||||
sanitize: {
|
||||
misc_undefined: ["integer"],
|
||||
},
|
||||
},
|
||||
|
||||
android_x86: {
|
||||
srcs: [
|
||||
"arch-x86/android_memset16.S",
|
||||
"arch-x86/android_memset32.S",
|
||||
],
|
||||
// TODO: This is to work around b/29412086.
|
||||
// Remove once __mulodi4 is available and move the "sanitize" block
|
||||
// to the android target.
|
||||
|
@ -153,10 +134,6 @@ cc_library {
|
|||
},
|
||||
|
||||
android_x86_64: {
|
||||
srcs: [
|
||||
"arch-x86_64/android_memset16.S",
|
||||
"arch-x86_64/android_memset32.S",
|
||||
],
|
||||
sanitize: {
|
||||
misc_undefined: ["integer"],
|
||||
},
|
||||
|
@ -206,7 +183,6 @@ cc_defaults {
|
|||
"android_get_control_socket_test.cpp",
|
||||
"ashmem_test.cpp",
|
||||
"fs_config_test.cpp",
|
||||
"memset_test.cpp",
|
||||
"multiuser_test.cpp",
|
||||
"properties_test.cpp",
|
||||
"sched_policy_test.cpp",
|
||||
|
|
|
@ -1,100 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2006 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* memset32.S
|
||||
*
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
|
||||
.text
|
||||
.align
|
||||
|
||||
.global android_memset32
|
||||
.type android_memset32, %function
|
||||
.global android_memset16
|
||||
.type android_memset16, %function
|
||||
|
||||
/*
|
||||
* Optimized memset32 and memset16 for ARM.
|
||||
*
|
||||
* void android_memset16(uint16_t* dst, uint16_t value, size_t size);
|
||||
* void android_memset32(uint32_t* dst, uint32_t value, size_t size);
|
||||
*
|
||||
*/
|
||||
|
||||
android_memset16:
|
||||
.fnstart
|
||||
cmp r2, #1
|
||||
bxle lr
|
||||
|
||||
/* expand the data to 32 bits */
|
||||
mov r1, r1, lsl #16
|
||||
orr r1, r1, r1, lsr #16
|
||||
|
||||
/* align to 32 bits */
|
||||
tst r0, #2
|
||||
strhne r1, [r0], #2
|
||||
subne r2, r2, #2
|
||||
.fnend
|
||||
|
||||
android_memset32:
|
||||
.fnstart
|
||||
.cfi_startproc
|
||||
str lr, [sp, #-4]!
|
||||
.cfi_def_cfa_offset 4
|
||||
.cfi_rel_offset lr, 0
|
||||
|
||||
/* align the destination to a cache-line */
|
||||
mov r12, r1
|
||||
mov lr, r1
|
||||
rsb r3, r0, #0
|
||||
ands r3, r3, #0x1C
|
||||
beq .Laligned32
|
||||
cmp r3, r2
|
||||
andhi r3, r2, #0x1C
|
||||
sub r2, r2, r3
|
||||
|
||||
/* conditionally writes 0 to 7 words (length in r3) */
|
||||
movs r3, r3, lsl #28
|
||||
stmiacs r0!, {r1, lr}
|
||||
stmiacs r0!, {r1, lr}
|
||||
stmiami r0!, {r1, lr}
|
||||
movs r3, r3, lsl #2
|
||||
strcs r1, [r0], #4
|
||||
|
||||
.Laligned32:
|
||||
mov r3, r1
|
||||
1: subs r2, r2, #32
|
||||
stmiahs r0!, {r1,r3,r12,lr}
|
||||
stmiahs r0!, {r1,r3,r12,lr}
|
||||
bhs 1b
|
||||
add r2, r2, #32
|
||||
|
||||
/* conditionally stores 0 to 30 bytes */
|
||||
movs r2, r2, lsl #28
|
||||
stmiacs r0!, {r1,r3,r12,lr}
|
||||
stmiami r0!, {r1,lr}
|
||||
movs r2, r2, lsl #2
|
||||
strcs r1, [r0], #4
|
||||
strhmi lr, [r0], #2
|
||||
|
||||
ldr lr, [sp], #4
|
||||
.cfi_def_cfa_offset 0
|
||||
.cfi_restore lr
|
||||
bx lr
|
||||
.cfi_endproc
|
||||
.fnend
|
|
@ -1,211 +0,0 @@
|
|||
/* Copyright (c) 2012, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Unaligned accesses
|
||||
*
|
||||
*/
|
||||
|
||||
/* By default we assume that the DC instruction can be used to zero
|
||||
data blocks more efficiently. In some circumstances this might be
|
||||
unsafe, for example in an asymmetric multiprocessor environment with
|
||||
different DC clear lengths (neither the upper nor lower lengths are
|
||||
safe to use). */
|
||||
|
||||
#define dst x0
|
||||
#define count x2
|
||||
#define tmp1 x3
|
||||
#define tmp1w w3
|
||||
#define tmp2 x4
|
||||
#define tmp2w w4
|
||||
#define zva_len_x x5
|
||||
#define zva_len w5
|
||||
#define zva_bits_x x6
|
||||
|
||||
#define A_l x1
|
||||
#define A_lw w1
|
||||
#define tmp3w w9
|
||||
|
||||
#define ENTRY(f) \
|
||||
.text; \
|
||||
.globl f; \
|
||||
.align 0; \
|
||||
.type f, %function; \
|
||||
f: \
|
||||
.cfi_startproc \
|
||||
|
||||
#define END(f) \
|
||||
.cfi_endproc; \
|
||||
.size f, .-f; \
|
||||
|
||||
ENTRY(android_memset16)
|
||||
ands A_lw, A_lw, #0xffff
|
||||
b.eq .Lzero_mem
|
||||
orr A_lw, A_lw, A_lw, lsl #16
|
||||
b .Lexpand_to_64
|
||||
END(android_memset16)
|
||||
|
||||
ENTRY(android_memset32)
|
||||
cmp A_lw, #0
|
||||
b.eq .Lzero_mem
|
||||
.Lexpand_to_64:
|
||||
orr A_l, A_l, A_l, lsl #32
|
||||
.Ltail_maybe_long:
|
||||
cmp count, #64
|
||||
b.ge .Lnot_short
|
||||
.Ltail_maybe_tiny:
|
||||
cmp count, #15
|
||||
b.le .Ltail15tiny
|
||||
.Ltail63:
|
||||
ands tmp1, count, #0x30
|
||||
b.eq .Ltail15
|
||||
add dst, dst, tmp1
|
||||
cmp tmp1w, #0x20
|
||||
b.eq 1f
|
||||
b.lt 2f
|
||||
stp A_l, A_l, [dst, #-48]
|
||||
1:
|
||||
stp A_l, A_l, [dst, #-32]
|
||||
2:
|
||||
stp A_l, A_l, [dst, #-16]
|
||||
|
||||
.Ltail15:
|
||||
and count, count, #15
|
||||
add dst, dst, count
|
||||
stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
|
||||
ret
|
||||
|
||||
.Ltail15tiny:
|
||||
/* Set up to 15 bytes. Does not assume earlier memory
|
||||
being set. */
|
||||
tbz count, #3, 1f
|
||||
str A_l, [dst], #8
|
||||
1:
|
||||
tbz count, #2, 1f
|
||||
str A_lw, [dst], #4
|
||||
1:
|
||||
tbz count, #1, 1f
|
||||
strh A_lw, [dst], #2
|
||||
1:
|
||||
ret
|
||||
|
||||
/* Critical loop. Start at a new cache line boundary. Assuming
|
||||
* 64 bytes per line, this ensures the entire loop is in one line. */
|
||||
.p2align 6
|
||||
.Lnot_short:
|
||||
neg tmp2, dst
|
||||
ands tmp2, tmp2, #15
|
||||
b.eq 2f
|
||||
/* Bring DST to 128-bit (16-byte) alignment. We know that there's
|
||||
* more than that to set, so we simply store 16 bytes and advance by
|
||||
* the amount required to reach alignment. */
|
||||
sub count, count, tmp2
|
||||
stp A_l, A_l, [dst]
|
||||
add dst, dst, tmp2
|
||||
/* There may be less than 63 bytes to go now. */
|
||||
cmp count, #63
|
||||
b.le .Ltail63
|
||||
2:
|
||||
sub dst, dst, #16 /* Pre-bias. */
|
||||
sub count, count, #64
|
||||
1:
|
||||
stp A_l, A_l, [dst, #16]
|
||||
stp A_l, A_l, [dst, #32]
|
||||
stp A_l, A_l, [dst, #48]
|
||||
stp A_l, A_l, [dst, #64]!
|
||||
subs count, count, #64
|
||||
b.ge 1b
|
||||
tst count, #0x3f
|
||||
add dst, dst, #16
|
||||
b.ne .Ltail63
|
||||
ret
|
||||
|
||||
/* For zeroing memory, check to see if we can use the ZVA feature to
|
||||
* zero entire 'cache' lines. */
|
||||
.Lzero_mem:
|
||||
mov A_l, #0
|
||||
cmp count, #63
|
||||
b.le .Ltail_maybe_tiny
|
||||
neg tmp2, dst
|
||||
ands tmp2, tmp2, #15
|
||||
b.eq 1f
|
||||
sub count, count, tmp2
|
||||
stp A_l, A_l, [dst]
|
||||
add dst, dst, tmp2
|
||||
cmp count, #63
|
||||
b.le .Ltail63
|
||||
1:
|
||||
/* For zeroing small amounts of memory, it's not worth setting up
|
||||
* the line-clear code. */
|
||||
cmp count, #128
|
||||
b.lt .Lnot_short
|
||||
mrs tmp1, dczid_el0
|
||||
tbnz tmp1, #4, .Lnot_short
|
||||
mov tmp3w, #4
|
||||
and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
|
||||
lsl zva_len, tmp3w, zva_len
|
||||
|
||||
.Lzero_by_line:
|
||||
/* Compute how far we need to go to become suitably aligned. We're
|
||||
* already at quad-word alignment. */
|
||||
cmp count, zva_len_x
|
||||
b.lt .Lnot_short /* Not enough to reach alignment. */
|
||||
sub zva_bits_x, zva_len_x, #1
|
||||
neg tmp2, dst
|
||||
ands tmp2, tmp2, zva_bits_x
|
||||
b.eq 1f /* Already aligned. */
|
||||
/* Not aligned, check that there's enough to copy after alignment. */
|
||||
sub tmp1, count, tmp2
|
||||
cmp tmp1, #64
|
||||
ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
|
||||
b.lt .Lnot_short
|
||||
/* We know that there's at least 64 bytes to zero and that it's safe
|
||||
* to overrun by 64 bytes. */
|
||||
mov count, tmp1
|
||||
2:
|
||||
stp A_l, A_l, [dst]
|
||||
stp A_l, A_l, [dst, #16]
|
||||
stp A_l, A_l, [dst, #32]
|
||||
subs tmp2, tmp2, #64
|
||||
stp A_l, A_l, [dst, #48]
|
||||
add dst, dst, #64
|
||||
b.ge 2b
|
||||
/* We've overrun a bit, so adjust dst downwards. */
|
||||
add dst, dst, tmp2
|
||||
1:
|
||||
sub count, count, zva_len_x
|
||||
3:
|
||||
dc zva, dst
|
||||
add dst, dst, zva_len_x
|
||||
subs count, count, zva_len_x
|
||||
b.ge 3b
|
||||
ands count, count, zva_bits_x
|
||||
b.ne .Ltail_maybe_long
|
||||
ret
|
||||
END(android_memset32)
|
|
@ -1,100 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2015 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* generic C version for any machine */
|
||||
|
||||
#include <cutils/memory.h>
|
||||
|
||||
#ifdef __clang__
|
||||
__attribute__((no_sanitize("integer")))
|
||||
#endif
|
||||
void android_memset16(uint16_t* dst, uint16_t value, size_t size)
|
||||
{
|
||||
/* optimized version of
|
||||
size >>= 1;
|
||||
while (size--)
|
||||
*dst++ = value;
|
||||
*/
|
||||
|
||||
size >>= 1;
|
||||
if (((uintptr_t)dst & 2) && size) {
|
||||
/* fill unpaired first elem separately */
|
||||
*dst++ = value;
|
||||
size--;
|
||||
}
|
||||
/* dst is now 32-bit-aligned */
|
||||
/* fill body with 32-bit pairs */
|
||||
uint32_t value32 = (((uint32_t)value) << 16) | ((uint32_t)value);
|
||||
android_memset32((uint32_t*) dst, value32, size<<1);
|
||||
if (size & 1) {
|
||||
dst[size-1] = value; /* fill unpaired last elem */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef __clang__
|
||||
__attribute__((no_sanitize("integer")))
|
||||
#endif
|
||||
void android_memset32(uint32_t* dst, uint32_t value, size_t size)
|
||||
{
|
||||
/* optimized version of
|
||||
size >>= 2;
|
||||
while (size--)
|
||||
*dst++ = value;
|
||||
*/
|
||||
|
||||
size >>= 2;
|
||||
if (((uintptr_t)dst & 4) && size) {
|
||||
/* fill unpaired first 32-bit elem separately */
|
||||
*dst++ = value;
|
||||
size--;
|
||||
}
|
||||
/* dst is now 64-bit aligned */
|
||||
/* fill body with 64-bit pairs */
|
||||
uint64_t value64 = (((uint64_t)value) << 32) | ((uint64_t)value);
|
||||
uint64_t* dst64 = (uint64_t*)dst;
|
||||
|
||||
while (size >= 12) {
|
||||
dst64[0] = value64;
|
||||
dst64[1] = value64;
|
||||
dst64[2] = value64;
|
||||
dst64[3] = value64;
|
||||
dst64[4] = value64;
|
||||
dst64[5] = value64;
|
||||
size -= 12;
|
||||
dst64 += 6;
|
||||
}
|
||||
|
||||
/* fill remainder with original 32-bit single-elem loop */
|
||||
dst = (uint32_t*) dst64;
|
||||
while (size != 0) {
|
||||
size--;
|
||||
*dst++ = value;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,719 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef MEMSET
|
||||
# define MEMSET android_memset16
|
||||
#endif
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#ifdef USE_AS_BZERO16
|
||||
# define DEST PARMS
|
||||
# define LEN DEST+4
|
||||
# define SETRTNVAL
|
||||
#else
|
||||
# define DEST PARMS
|
||||
# define CHR DEST+4
|
||||
# define LEN CHR+4
|
||||
# define SETRTNVAL movl DEST(%esp), %eax
|
||||
#endif
|
||||
|
||||
#if (defined SHARED || defined __PIC__)
|
||||
# define ENTRANCE PUSH (%ebx);
|
||||
# define RETURN_END POP (%ebx); ret
|
||||
# define RETURN RETURN_END; CFI_PUSH (%ebx)
|
||||
# define PARMS 8 /* Preserve EBX. */
|
||||
# define JMPTBL(I, B) I - B
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
|
||||
/* We first load PC into EBX. */ \
|
||||
call __x86.get_pc_thunk.bx; \
|
||||
/* Get the address of the jump table. */ \
|
||||
add $(TABLE - .), %ebx; \
|
||||
/* Get the entry and convert the relative offset to the \
|
||||
absolute address. */ \
|
||||
add (%ebx,%ecx,4), %ebx; \
|
||||
/* We loaded the jump table and adjuested EDX. Go. */ \
|
||||
jmp *%ebx
|
||||
|
||||
.section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __x86.get_pc_thunk.bx
|
||||
.hidden __x86.get_pc_thunk.bx
|
||||
ALIGN (4)
|
||||
.type __x86.get_pc_thunk.bx,@function
|
||||
__x86.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
#else
|
||||
# define ENTRANCE
|
||||
# define RETURN_END ret
|
||||
# define RETURN RETURN_END
|
||||
# define PARMS 4
|
||||
# define JMPTBL(I, B) I
|
||||
|
||||
/* Branch to an entry in a jump table. TABLE is a jump table with
|
||||
absolute offsets. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
|
||||
jmp *TABLE(,%ecx,4)
|
||||
#endif
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ALIGN (4)
|
||||
ENTRY (MEMSET)
|
||||
ENTRANCE
|
||||
|
||||
movl LEN(%esp), %ecx
|
||||
shr $1, %ecx
|
||||
#ifdef USE_AS_BZERO16
|
||||
xor %eax, %eax
|
||||
#else
|
||||
movzwl CHR(%esp), %eax
|
||||
mov %eax, %edx
|
||||
shl $16, %eax
|
||||
or %edx, %eax
|
||||
#endif
|
||||
movl DEST(%esp), %edx
|
||||
cmp $32, %ecx
|
||||
jae L(32wordsormore)
|
||||
|
||||
L(write_less32words):
|
||||
lea (%edx, %ecx, 2), %edx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_less32words))
|
||||
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_less32words):
|
||||
.int JMPTBL (L(write_0words), L(table_less32words))
|
||||
.int JMPTBL (L(write_1words), L(table_less32words))
|
||||
.int JMPTBL (L(write_2words), L(table_less32words))
|
||||
.int JMPTBL (L(write_3words), L(table_less32words))
|
||||
.int JMPTBL (L(write_4words), L(table_less32words))
|
||||
.int JMPTBL (L(write_5words), L(table_less32words))
|
||||
.int JMPTBL (L(write_6words), L(table_less32words))
|
||||
.int JMPTBL (L(write_7words), L(table_less32words))
|
||||
.int JMPTBL (L(write_8words), L(table_less32words))
|
||||
.int JMPTBL (L(write_9words), L(table_less32words))
|
||||
.int JMPTBL (L(write_10words), L(table_less32words))
|
||||
.int JMPTBL (L(write_11words), L(table_less32words))
|
||||
.int JMPTBL (L(write_12words), L(table_less32words))
|
||||
.int JMPTBL (L(write_13words), L(table_less32words))
|
||||
.int JMPTBL (L(write_14words), L(table_less32words))
|
||||
.int JMPTBL (L(write_15words), L(table_less32words))
|
||||
.int JMPTBL (L(write_16words), L(table_less32words))
|
||||
.int JMPTBL (L(write_17words), L(table_less32words))
|
||||
.int JMPTBL (L(write_18words), L(table_less32words))
|
||||
.int JMPTBL (L(write_19words), L(table_less32words))
|
||||
.int JMPTBL (L(write_20words), L(table_less32words))
|
||||
.int JMPTBL (L(write_21words), L(table_less32words))
|
||||
.int JMPTBL (L(write_22words), L(table_less32words))
|
||||
.int JMPTBL (L(write_23words), L(table_less32words))
|
||||
.int JMPTBL (L(write_24words), L(table_less32words))
|
||||
.int JMPTBL (L(write_25words), L(table_less32words))
|
||||
.int JMPTBL (L(write_26words), L(table_less32words))
|
||||
.int JMPTBL (L(write_27words), L(table_less32words))
|
||||
.int JMPTBL (L(write_28words), L(table_less32words))
|
||||
.int JMPTBL (L(write_29words), L(table_less32words))
|
||||
.int JMPTBL (L(write_30words), L(table_less32words))
|
||||
.int JMPTBL (L(write_31words), L(table_less32words))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(write_28words):
|
||||
movl %eax, -56(%edx)
|
||||
movl %eax, -52(%edx)
|
||||
L(write_24words):
|
||||
movl %eax, -48(%edx)
|
||||
movl %eax, -44(%edx)
|
||||
L(write_20words):
|
||||
movl %eax, -40(%edx)
|
||||
movl %eax, -36(%edx)
|
||||
L(write_16words):
|
||||
movl %eax, -32(%edx)
|
||||
movl %eax, -28(%edx)
|
||||
L(write_12words):
|
||||
movl %eax, -24(%edx)
|
||||
movl %eax, -20(%edx)
|
||||
L(write_8words):
|
||||
movl %eax, -16(%edx)
|
||||
movl %eax, -12(%edx)
|
||||
L(write_4words):
|
||||
movl %eax, -8(%edx)
|
||||
movl %eax, -4(%edx)
|
||||
L(write_0words):
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(write_29words):
|
||||
movl %eax, -58(%edx)
|
||||
movl %eax, -54(%edx)
|
||||
L(write_25words):
|
||||
movl %eax, -50(%edx)
|
||||
movl %eax, -46(%edx)
|
||||
L(write_21words):
|
||||
movl %eax, -42(%edx)
|
||||
movl %eax, -38(%edx)
|
||||
L(write_17words):
|
||||
movl %eax, -34(%edx)
|
||||
movl %eax, -30(%edx)
|
||||
L(write_13words):
|
||||
movl %eax, -26(%edx)
|
||||
movl %eax, -22(%edx)
|
||||
L(write_9words):
|
||||
movl %eax, -18(%edx)
|
||||
movl %eax, -14(%edx)
|
||||
L(write_5words):
|
||||
movl %eax, -10(%edx)
|
||||
movl %eax, -6(%edx)
|
||||
L(write_1words):
|
||||
mov %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(write_30words):
|
||||
movl %eax, -60(%edx)
|
||||
movl %eax, -56(%edx)
|
||||
L(write_26words):
|
||||
movl %eax, -52(%edx)
|
||||
movl %eax, -48(%edx)
|
||||
L(write_22words):
|
||||
movl %eax, -44(%edx)
|
||||
movl %eax, -40(%edx)
|
||||
L(write_18words):
|
||||
movl %eax, -36(%edx)
|
||||
movl %eax, -32(%edx)
|
||||
L(write_14words):
|
||||
movl %eax, -28(%edx)
|
||||
movl %eax, -24(%edx)
|
||||
L(write_10words):
|
||||
movl %eax, -20(%edx)
|
||||
movl %eax, -16(%edx)
|
||||
L(write_6words):
|
||||
movl %eax, -12(%edx)
|
||||
movl %eax, -8(%edx)
|
||||
L(write_2words):
|
||||
movl %eax, -4(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(write_31words):
|
||||
movl %eax, -62(%edx)
|
||||
movl %eax, -58(%edx)
|
||||
L(write_27words):
|
||||
movl %eax, -54(%edx)
|
||||
movl %eax, -50(%edx)
|
||||
L(write_23words):
|
||||
movl %eax, -46(%edx)
|
||||
movl %eax, -42(%edx)
|
||||
L(write_19words):
|
||||
movl %eax, -38(%edx)
|
||||
movl %eax, -34(%edx)
|
||||
L(write_15words):
|
||||
movl %eax, -30(%edx)
|
||||
movl %eax, -26(%edx)
|
||||
L(write_11words):
|
||||
movl %eax, -22(%edx)
|
||||
movl %eax, -18(%edx)
|
||||
L(write_7words):
|
||||
movl %eax, -14(%edx)
|
||||
movl %eax, -10(%edx)
|
||||
L(write_3words):
|
||||
movl %eax, -6(%edx)
|
||||
movw %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
|
||||
L(32wordsormore):
|
||||
shl $1, %ecx
|
||||
test $0x01, %edx
|
||||
jz L(aligned2bytes)
|
||||
mov %eax, (%edx)
|
||||
mov %eax, -4(%edx, %ecx)
|
||||
sub $2, %ecx
|
||||
add $1, %edx
|
||||
rol $8, %eax
|
||||
L(aligned2bytes):
|
||||
#ifdef USE_AS_BZERO16
|
||||
pxor %xmm0, %xmm0
|
||||
#else
|
||||
movd %eax, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
#endif
|
||||
testl $0xf, %edx
|
||||
jz L(aligned_16)
|
||||
/* ECX > 32 and EDX is not 16 byte aligned. */
|
||||
L(not_aligned_16):
|
||||
movdqu %xmm0, (%edx)
|
||||
movl %edx, %eax
|
||||
and $-16, %edx
|
||||
add $16, %edx
|
||||
sub %edx, %eax
|
||||
add %eax, %ecx
|
||||
movd %xmm0, %eax
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %ecx
|
||||
jae L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
add %ecx, %edx
|
||||
shr $1, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore):
|
||||
#ifdef SHARED_CACHE_SIZE
|
||||
PUSH (%ebx)
|
||||
mov $SHARED_CACHE_SIZE, %ebx
|
||||
#else
|
||||
# if (defined SHARED || defined __PIC__)
|
||||
call __x86.get_pc_thunk.bx
|
||||
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
|
||||
# else
|
||||
PUSH (%ebx)
|
||||
mov __x86_shared_cache_size, %ebx
|
||||
# endif
|
||||
#endif
|
||||
cmp %ebx, %ecx
|
||||
jae L(128bytesormore_nt_start)
|
||||
|
||||
|
||||
#ifdef DATA_CACHE_SIZE
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp $DATA_CACHE_SIZE, %ecx
|
||||
#else
|
||||
# if (defined SHARED || defined __PIC__)
|
||||
# define RESTORE_EBX_STATE
|
||||
call __x86.get_pc_thunk.bx
|
||||
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
|
||||
# else
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp __x86_data_cache_size, %ecx
|
||||
# endif
|
||||
#endif
|
||||
|
||||
jae L(128bytes_L2_normal)
|
||||
subl $128, %ecx
|
||||
L(128bytesormore_normal):
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jb L(128bytesless_normal)
|
||||
|
||||
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jae L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
lea 128(%ecx), %ecx
|
||||
add %ecx, %edx
|
||||
shr $1, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytes_L2_normal):
|
||||
prefetcht0 0x380(%edx)
|
||||
prefetcht0 0x3c0(%edx)
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movaps %xmm0, 0x10(%edx)
|
||||
movaps %xmm0, 0x20(%edx)
|
||||
movaps %xmm0, 0x30(%edx)
|
||||
movaps %xmm0, 0x40(%edx)
|
||||
movaps %xmm0, 0x50(%edx)
|
||||
movaps %xmm0, 0x60(%edx)
|
||||
movaps %xmm0, 0x70(%edx)
|
||||
add $128, %edx
|
||||
cmp $128, %ecx
|
||||
jae L(128bytes_L2_normal)
|
||||
|
||||
L(128bytesless_L2_normal):
|
||||
add %ecx, %edx
|
||||
shr $1, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
RESTORE_EBX_STATE
|
||||
L(128bytesormore_nt_start):
|
||||
sub %ebx, %ecx
|
||||
mov %ebx, %eax
|
||||
and $0x7f, %eax
|
||||
add %eax, %ecx
|
||||
movd %xmm0, %eax
|
||||
ALIGN (4)
|
||||
L(128bytesormore_shared_cache_loop):
|
||||
prefetcht0 0x3c0(%edx)
|
||||
prefetcht0 0x380(%edx)
|
||||
sub $0x80, %ebx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ebx
|
||||
jae L(128bytesormore_shared_cache_loop)
|
||||
cmp $0x80, %ecx
|
||||
jb L(shared_cache_loop_end)
|
||||
ALIGN (4)
|
||||
L(128bytesormore_nt):
|
||||
sub $0x80, %ecx
|
||||
movntdq %xmm0, (%edx)
|
||||
movntdq %xmm0, 0x10(%edx)
|
||||
movntdq %xmm0, 0x20(%edx)
|
||||
movntdq %xmm0, 0x30(%edx)
|
||||
movntdq %xmm0, 0x40(%edx)
|
||||
movntdq %xmm0, 0x50(%edx)
|
||||
movntdq %xmm0, 0x60(%edx)
|
||||
movntdq %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ecx
|
||||
jae L(128bytesormore_nt)
|
||||
sfence
|
||||
L(shared_cache_loop_end):
|
||||
#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
|
||||
POP (%ebx)
|
||||
#endif
|
||||
add %ecx, %edx
|
||||
shr $1, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_16_128bytes):
|
||||
.int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
|
||||
.popsection
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_112bytes):
|
||||
movdqa %xmm0, -112(%edx)
|
||||
L(aligned_16_96bytes):
|
||||
movdqa %xmm0, -96(%edx)
|
||||
L(aligned_16_80bytes):
|
||||
movdqa %xmm0, -80(%edx)
|
||||
L(aligned_16_64bytes):
|
||||
movdqa %xmm0, -64(%edx)
|
||||
L(aligned_16_48bytes):
|
||||
movdqa %xmm0, -48(%edx)
|
||||
L(aligned_16_32bytes):
|
||||
movdqa %xmm0, -32(%edx)
|
||||
L(aligned_16_16bytes):
|
||||
movdqa %xmm0, -16(%edx)
|
||||
L(aligned_16_0bytes):
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_114bytes):
|
||||
movdqa %xmm0, -114(%edx)
|
||||
L(aligned_16_98bytes):
|
||||
movdqa %xmm0, -98(%edx)
|
||||
L(aligned_16_82bytes):
|
||||
movdqa %xmm0, -82(%edx)
|
||||
L(aligned_16_66bytes):
|
||||
movdqa %xmm0, -66(%edx)
|
||||
L(aligned_16_50bytes):
|
||||
movdqa %xmm0, -50(%edx)
|
||||
L(aligned_16_34bytes):
|
||||
movdqa %xmm0, -34(%edx)
|
||||
L(aligned_16_18bytes):
|
||||
movdqa %xmm0, -18(%edx)
|
||||
L(aligned_16_2bytes):
|
||||
movw %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_116bytes):
|
||||
movdqa %xmm0, -116(%edx)
|
||||
L(aligned_16_100bytes):
|
||||
movdqa %xmm0, -100(%edx)
|
||||
L(aligned_16_84bytes):
|
||||
movdqa %xmm0, -84(%edx)
|
||||
L(aligned_16_68bytes):
|
||||
movdqa %xmm0, -68(%edx)
|
||||
L(aligned_16_52bytes):
|
||||
movdqa %xmm0, -52(%edx)
|
||||
L(aligned_16_36bytes):
|
||||
movdqa %xmm0, -36(%edx)
|
||||
L(aligned_16_20bytes):
|
||||
movdqa %xmm0, -20(%edx)
|
||||
L(aligned_16_4bytes):
|
||||
movl %eax, -4(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_118bytes):
|
||||
movdqa %xmm0, -118(%edx)
|
||||
L(aligned_16_102bytes):
|
||||
movdqa %xmm0, -102(%edx)
|
||||
L(aligned_16_86bytes):
|
||||
movdqa %xmm0, -86(%edx)
|
||||
L(aligned_16_70bytes):
|
||||
movdqa %xmm0, -70(%edx)
|
||||
L(aligned_16_54bytes):
|
||||
movdqa %xmm0, -54(%edx)
|
||||
L(aligned_16_38bytes):
|
||||
movdqa %xmm0, -38(%edx)
|
||||
L(aligned_16_22bytes):
|
||||
movdqa %xmm0, -22(%edx)
|
||||
L(aligned_16_6bytes):
|
||||
movl %eax, -6(%edx)
|
||||
movw %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_120bytes):
|
||||
movdqa %xmm0, -120(%edx)
|
||||
L(aligned_16_104bytes):
|
||||
movdqa %xmm0, -104(%edx)
|
||||
L(aligned_16_88bytes):
|
||||
movdqa %xmm0, -88(%edx)
|
||||
L(aligned_16_72bytes):
|
||||
movdqa %xmm0, -72(%edx)
|
||||
L(aligned_16_56bytes):
|
||||
movdqa %xmm0, -56(%edx)
|
||||
L(aligned_16_40bytes):
|
||||
movdqa %xmm0, -40(%edx)
|
||||
L(aligned_16_24bytes):
|
||||
movdqa %xmm0, -24(%edx)
|
||||
L(aligned_16_8bytes):
|
||||
movq %xmm0, -8(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_122bytes):
|
||||
movdqa %xmm0, -122(%edx)
|
||||
L(aligned_16_106bytes):
|
||||
movdqa %xmm0, -106(%edx)
|
||||
L(aligned_16_90bytes):
|
||||
movdqa %xmm0, -90(%edx)
|
||||
L(aligned_16_74bytes):
|
||||
movdqa %xmm0, -74(%edx)
|
||||
L(aligned_16_58bytes):
|
||||
movdqa %xmm0, -58(%edx)
|
||||
L(aligned_16_42bytes):
|
||||
movdqa %xmm0, -42(%edx)
|
||||
L(aligned_16_26bytes):
|
||||
movdqa %xmm0, -26(%edx)
|
||||
L(aligned_16_10bytes):
|
||||
movq %xmm0, -10(%edx)
|
||||
movw %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_124bytes):
|
||||
movdqa %xmm0, -124(%edx)
|
||||
L(aligned_16_108bytes):
|
||||
movdqa %xmm0, -108(%edx)
|
||||
L(aligned_16_92bytes):
|
||||
movdqa %xmm0, -92(%edx)
|
||||
L(aligned_16_76bytes):
|
||||
movdqa %xmm0, -76(%edx)
|
||||
L(aligned_16_60bytes):
|
||||
movdqa %xmm0, -60(%edx)
|
||||
L(aligned_16_44bytes):
|
||||
movdqa %xmm0, -44(%edx)
|
||||
L(aligned_16_28bytes):
|
||||
movdqa %xmm0, -28(%edx)
|
||||
L(aligned_16_12bytes):
|
||||
movq %xmm0, -12(%edx)
|
||||
movl %eax, -4(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_126bytes):
|
||||
movdqa %xmm0, -126(%edx)
|
||||
L(aligned_16_110bytes):
|
||||
movdqa %xmm0, -110(%edx)
|
||||
L(aligned_16_94bytes):
|
||||
movdqa %xmm0, -94(%edx)
|
||||
L(aligned_16_78bytes):
|
||||
movdqa %xmm0, -78(%edx)
|
||||
L(aligned_16_62bytes):
|
||||
movdqa %xmm0, -62(%edx)
|
||||
L(aligned_16_46bytes):
|
||||
movdqa %xmm0, -46(%edx)
|
||||
L(aligned_16_30bytes):
|
||||
movdqa %xmm0, -30(%edx)
|
||||
L(aligned_16_14bytes):
|
||||
movq %xmm0, -14(%edx)
|
||||
movl %eax, -6(%edx)
|
||||
movw %ax, -2(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
END (MEMSET)
|
|
@ -1,510 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef MEMSET
|
||||
# define MEMSET android_memset32
|
||||
#endif
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#ifdef USE_AS_BZERO32
|
||||
# define DEST PARMS
|
||||
# define LEN DEST+4
|
||||
# define SETRTNVAL
|
||||
#else
|
||||
# define DEST PARMS
|
||||
# define DWDS DEST+4
|
||||
# define LEN DWDS+4
|
||||
# define SETRTNVAL movl DEST(%esp), %eax
|
||||
#endif
|
||||
|
||||
#if (defined SHARED || defined __PIC__)
|
||||
# define ENTRANCE PUSH (%ebx);
|
||||
# define RETURN_END POP (%ebx); ret
|
||||
# define RETURN RETURN_END; CFI_PUSH (%ebx)
|
||||
# define PARMS 8 /* Preserve EBX. */
|
||||
# define JMPTBL(I, B) I - B
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
|
||||
/* We first load PC into EBX. */ \
|
||||
call __x86.get_pc_thunk.bx; \
|
||||
/* Get the address of the jump table. */ \
|
||||
add $(TABLE - .), %ebx; \
|
||||
/* Get the entry and convert the relative offset to the \
|
||||
absolute address. */ \
|
||||
add (%ebx,%ecx,4), %ebx; \
|
||||
/* We loaded the jump table and adjuested EDX. Go. */ \
|
||||
jmp *%ebx
|
||||
|
||||
.section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __x86.get_pc_thunk.bx
|
||||
.hidden __x86.get_pc_thunk.bx
|
||||
ALIGN (4)
|
||||
.type __x86.get_pc_thunk.bx,@function
|
||||
__x86.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
#else
|
||||
# define ENTRANCE
|
||||
# define RETURN_END ret
|
||||
# define RETURN RETURN_END
|
||||
# define PARMS 4
|
||||
# define JMPTBL(I, B) I
|
||||
|
||||
/* Branch to an entry in a jump table. TABLE is a jump table with
|
||||
absolute offsets. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
|
||||
jmp *TABLE(,%ecx,4)
|
||||
#endif
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ALIGN (4)
|
||||
ENTRY (MEMSET)
|
||||
ENTRANCE
|
||||
|
||||
movl LEN(%esp), %ecx
|
||||
shr $2, %ecx
|
||||
#ifdef USE_AS_BZERO32
|
||||
xor %eax, %eax
|
||||
#else
|
||||
mov DWDS(%esp), %eax
|
||||
mov %eax, %edx
|
||||
#endif
|
||||
movl DEST(%esp), %edx
|
||||
cmp $16, %ecx
|
||||
jae L(16dbwordsormore)
|
||||
|
||||
L(write_less16dbwords):
|
||||
lea (%edx, %ecx, 4), %edx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_less16dbwords))
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_less16dbwords):
|
||||
.int JMPTBL (L(write_0dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_1dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_2dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_3dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_4dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_5dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_6dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_7dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_8dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_9dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_10dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_11dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_12dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_13dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_14dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_15dbwords), L(table_less16dbwords))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(write_15dbwords):
|
||||
movl %eax, -60(%edx)
|
||||
L(write_14dbwords):
|
||||
movl %eax, -56(%edx)
|
||||
L(write_13dbwords):
|
||||
movl %eax, -52(%edx)
|
||||
L(write_12dbwords):
|
||||
movl %eax, -48(%edx)
|
||||
L(write_11dbwords):
|
||||
movl %eax, -44(%edx)
|
||||
L(write_10dbwords):
|
||||
movl %eax, -40(%edx)
|
||||
L(write_9dbwords):
|
||||
movl %eax, -36(%edx)
|
||||
L(write_8dbwords):
|
||||
movl %eax, -32(%edx)
|
||||
L(write_7dbwords):
|
||||
movl %eax, -28(%edx)
|
||||
L(write_6dbwords):
|
||||
movl %eax, -24(%edx)
|
||||
L(write_5dbwords):
|
||||
movl %eax, -20(%edx)
|
||||
L(write_4dbwords):
|
||||
movl %eax, -16(%edx)
|
||||
L(write_3dbwords):
|
||||
movl %eax, -12(%edx)
|
||||
L(write_2dbwords):
|
||||
movl %eax, -8(%edx)
|
||||
L(write_1dbwords):
|
||||
movl %eax, -4(%edx)
|
||||
L(write_0dbwords):
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(16dbwordsormore):
|
||||
test $3, %edx
|
||||
jz L(aligned4bytes)
|
||||
mov %eax, (%edx)
|
||||
mov %eax, -4(%edx, %ecx, 4)
|
||||
sub $1, %ecx
|
||||
rol $24, %eax
|
||||
add $1, %edx
|
||||
test $3, %edx
|
||||
jz L(aligned4bytes)
|
||||
ror $8, %eax
|
||||
add $1, %edx
|
||||
test $3, %edx
|
||||
jz L(aligned4bytes)
|
||||
ror $8, %eax
|
||||
add $1, %edx
|
||||
L(aligned4bytes):
|
||||
shl $2, %ecx
|
||||
|
||||
#ifdef USE_AS_BZERO32
|
||||
pxor %xmm0, %xmm0
|
||||
#else
|
||||
movd %eax, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
#endif
|
||||
testl $0xf, %edx
|
||||
jz L(aligned_16)
|
||||
/* ECX > 32 and EDX is not 16 byte aligned. */
|
||||
L(not_aligned_16):
|
||||
movdqu %xmm0, (%edx)
|
||||
movl %edx, %eax
|
||||
and $-16, %edx
|
||||
add $16, %edx
|
||||
sub %edx, %eax
|
||||
add %eax, %ecx
|
||||
movd %xmm0, %eax
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %ecx
|
||||
jae L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
add %ecx, %edx
|
||||
shr $2, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore):
|
||||
#ifdef SHARED_CACHE_SIZE
|
||||
PUSH (%ebx)
|
||||
mov $SHARED_CACHE_SIZE, %ebx
|
||||
#else
|
||||
# if (defined SHARED || defined __PIC__)
|
||||
call __x86.get_pc_thunk.bx
|
||||
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
|
||||
# else
|
||||
PUSH (%ebx)
|
||||
mov __x86_shared_cache_size, %ebx
|
||||
# endif
|
||||
#endif
|
||||
cmp %ebx, %ecx
|
||||
jae L(128bytesormore_nt_start)
|
||||
|
||||
#ifdef DATA_CACHE_SIZE
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp $DATA_CACHE_SIZE, %ecx
|
||||
#else
|
||||
# if (defined SHARED || defined __PIC__)
|
||||
# define RESTORE_EBX_STATE
|
||||
call __x86.get_pc_thunk.bx
|
||||
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
|
||||
# else
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp __x86_data_cache_size, %ecx
|
||||
# endif
|
||||
#endif
|
||||
|
||||
jae L(128bytes_L2_normal)
|
||||
subl $128, %ecx
|
||||
L(128bytesormore_normal):
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jb L(128bytesless_normal)
|
||||
|
||||
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jae L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
lea 128(%ecx), %ecx
|
||||
add %ecx, %edx
|
||||
shr $2, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytes_L2_normal):
|
||||
prefetcht0 0x380(%edx)
|
||||
prefetcht0 0x3c0(%edx)
|
||||
sub $128, %ecx
|
||||
movdqa %xmm0, (%edx)
|
||||
movaps %xmm0, 0x10(%edx)
|
||||
movaps %xmm0, 0x20(%edx)
|
||||
movaps %xmm0, 0x30(%edx)
|
||||
movaps %xmm0, 0x40(%edx)
|
||||
movaps %xmm0, 0x50(%edx)
|
||||
movaps %xmm0, 0x60(%edx)
|
||||
movaps %xmm0, 0x70(%edx)
|
||||
add $128, %edx
|
||||
cmp $128, %ecx
|
||||
jae L(128bytes_L2_normal)
|
||||
|
||||
L(128bytesless_L2_normal):
|
||||
add %ecx, %edx
|
||||
shr $2, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
RESTORE_EBX_STATE
|
||||
L(128bytesormore_nt_start):
|
||||
sub %ebx, %ecx
|
||||
mov %ebx, %eax
|
||||
and $0x7f, %eax
|
||||
add %eax, %ecx
|
||||
movd %xmm0, %eax
|
||||
ALIGN (4)
|
||||
L(128bytesormore_shared_cache_loop):
|
||||
prefetcht0 0x3c0(%edx)
|
||||
prefetcht0 0x380(%edx)
|
||||
sub $0x80, %ebx
|
||||
movdqa %xmm0, (%edx)
|
||||
movdqa %xmm0, 0x10(%edx)
|
||||
movdqa %xmm0, 0x20(%edx)
|
||||
movdqa %xmm0, 0x30(%edx)
|
||||
movdqa %xmm0, 0x40(%edx)
|
||||
movdqa %xmm0, 0x50(%edx)
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ebx
|
||||
jae L(128bytesormore_shared_cache_loop)
|
||||
cmp $0x80, %ecx
|
||||
jb L(shared_cache_loop_end)
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore_nt):
|
||||
sub $0x80, %ecx
|
||||
movntdq %xmm0, (%edx)
|
||||
movntdq %xmm0, 0x10(%edx)
|
||||
movntdq %xmm0, 0x20(%edx)
|
||||
movntdq %xmm0, 0x30(%edx)
|
||||
movntdq %xmm0, 0x40(%edx)
|
||||
movntdq %xmm0, 0x50(%edx)
|
||||
movntdq %xmm0, 0x60(%edx)
|
||||
movntdq %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ecx
|
||||
jae L(128bytesormore_nt)
|
||||
sfence
|
||||
L(shared_cache_loop_end):
|
||||
#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
|
||||
POP (%ebx)
|
||||
#endif
|
||||
add %ecx, %edx
|
||||
shr $2, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_16_128bytes):
|
||||
.int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_112bytes):
|
||||
movdqa %xmm0, -112(%edx)
|
||||
L(aligned_16_96bytes):
|
||||
movdqa %xmm0, -96(%edx)
|
||||
L(aligned_16_80bytes):
|
||||
movdqa %xmm0, -80(%edx)
|
||||
L(aligned_16_64bytes):
|
||||
movdqa %xmm0, -64(%edx)
|
||||
L(aligned_16_48bytes):
|
||||
movdqa %xmm0, -48(%edx)
|
||||
L(aligned_16_32bytes):
|
||||
movdqa %xmm0, -32(%edx)
|
||||
L(aligned_16_16bytes):
|
||||
movdqa %xmm0, -16(%edx)
|
||||
L(aligned_16_0bytes):
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_116bytes):
|
||||
movdqa %xmm0, -116(%edx)
|
||||
L(aligned_16_100bytes):
|
||||
movdqa %xmm0, -100(%edx)
|
||||
L(aligned_16_84bytes):
|
||||
movdqa %xmm0, -84(%edx)
|
||||
L(aligned_16_68bytes):
|
||||
movdqa %xmm0, -68(%edx)
|
||||
L(aligned_16_52bytes):
|
||||
movdqa %xmm0, -52(%edx)
|
||||
L(aligned_16_36bytes):
|
||||
movdqa %xmm0, -36(%edx)
|
||||
L(aligned_16_20bytes):
|
||||
movdqa %xmm0, -20(%edx)
|
||||
L(aligned_16_4bytes):
|
||||
movl %eax, -4(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_120bytes):
|
||||
movdqa %xmm0, -120(%edx)
|
||||
L(aligned_16_104bytes):
|
||||
movdqa %xmm0, -104(%edx)
|
||||
L(aligned_16_88bytes):
|
||||
movdqa %xmm0, -88(%edx)
|
||||
L(aligned_16_72bytes):
|
||||
movdqa %xmm0, -72(%edx)
|
||||
L(aligned_16_56bytes):
|
||||
movdqa %xmm0, -56(%edx)
|
||||
L(aligned_16_40bytes):
|
||||
movdqa %xmm0, -40(%edx)
|
||||
L(aligned_16_24bytes):
|
||||
movdqa %xmm0, -24(%edx)
|
||||
L(aligned_16_8bytes):
|
||||
movq %xmm0, -8(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_124bytes):
|
||||
movdqa %xmm0, -124(%edx)
|
||||
L(aligned_16_108bytes):
|
||||
movdqa %xmm0, -108(%edx)
|
||||
L(aligned_16_92bytes):
|
||||
movdqa %xmm0, -92(%edx)
|
||||
L(aligned_16_76bytes):
|
||||
movdqa %xmm0, -76(%edx)
|
||||
L(aligned_16_60bytes):
|
||||
movdqa %xmm0, -60(%edx)
|
||||
L(aligned_16_44bytes):
|
||||
movdqa %xmm0, -44(%edx)
|
||||
L(aligned_16_28bytes):
|
||||
movdqa %xmm0, -28(%edx)
|
||||
L(aligned_16_12bytes):
|
||||
movq %xmm0, -12(%edx)
|
||||
movl %eax, -4(%edx)
|
||||
SETRTNVAL
|
||||
RETURN
|
||||
|
||||
END (MEMSET)
|
|
@ -1,565 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef MEMSET
|
||||
# define MEMSET android_memset16
|
||||
#endif
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define JMPTBL(I, B) I - B
|
||||
|
||||
/* Branch to an entry in a jump table. TABLE is a jump table with
|
||||
relative offsets. INDEX is a register contains the index into the
|
||||
jump table. SCALE is the scale of INDEX. */
|
||||
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
lea TABLE(%rip), %r11; \
|
||||
movslq (%r11, INDEX, SCALE), INDEX; \
|
||||
lea (%r11, INDEX), INDEX; \
|
||||
jmp *INDEX
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ALIGN (4)
|
||||
ENTRY (MEMSET) // Address in rdi
|
||||
shr $1, %rdx // Count in rdx
|
||||
movzwl %si, %ecx
|
||||
/* Fill the whole ECX with pattern. */
|
||||
shl $16, %esi
|
||||
or %esi, %ecx // Pattern in ecx
|
||||
|
||||
cmp $32, %rdx
|
||||
jae L(32wordsormore)
|
||||
|
||||
L(write_less32words):
|
||||
lea (%rdi, %rdx, 2), %rdi
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_less32words), %rdx, 4)
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_less32words):
|
||||
.int JMPTBL (L(write_0words), L(table_less32words))
|
||||
.int JMPTBL (L(write_1words), L(table_less32words))
|
||||
.int JMPTBL (L(write_2words), L(table_less32words))
|
||||
.int JMPTBL (L(write_3words), L(table_less32words))
|
||||
.int JMPTBL (L(write_4words), L(table_less32words))
|
||||
.int JMPTBL (L(write_5words), L(table_less32words))
|
||||
.int JMPTBL (L(write_6words), L(table_less32words))
|
||||
.int JMPTBL (L(write_7words), L(table_less32words))
|
||||
.int JMPTBL (L(write_8words), L(table_less32words))
|
||||
.int JMPTBL (L(write_9words), L(table_less32words))
|
||||
.int JMPTBL (L(write_10words), L(table_less32words))
|
||||
.int JMPTBL (L(write_11words), L(table_less32words))
|
||||
.int JMPTBL (L(write_12words), L(table_less32words))
|
||||
.int JMPTBL (L(write_13words), L(table_less32words))
|
||||
.int JMPTBL (L(write_14words), L(table_less32words))
|
||||
.int JMPTBL (L(write_15words), L(table_less32words))
|
||||
.int JMPTBL (L(write_16words), L(table_less32words))
|
||||
.int JMPTBL (L(write_17words), L(table_less32words))
|
||||
.int JMPTBL (L(write_18words), L(table_less32words))
|
||||
.int JMPTBL (L(write_19words), L(table_less32words))
|
||||
.int JMPTBL (L(write_20words), L(table_less32words))
|
||||
.int JMPTBL (L(write_21words), L(table_less32words))
|
||||
.int JMPTBL (L(write_22words), L(table_less32words))
|
||||
.int JMPTBL (L(write_23words), L(table_less32words))
|
||||
.int JMPTBL (L(write_24words), L(table_less32words))
|
||||
.int JMPTBL (L(write_25words), L(table_less32words))
|
||||
.int JMPTBL (L(write_26words), L(table_less32words))
|
||||
.int JMPTBL (L(write_27words), L(table_less32words))
|
||||
.int JMPTBL (L(write_28words), L(table_less32words))
|
||||
.int JMPTBL (L(write_29words), L(table_less32words))
|
||||
.int JMPTBL (L(write_30words), L(table_less32words))
|
||||
.int JMPTBL (L(write_31words), L(table_less32words))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(write_28words):
|
||||
movl %ecx, -56(%rdi)
|
||||
movl %ecx, -52(%rdi)
|
||||
L(write_24words):
|
||||
movl %ecx, -48(%rdi)
|
||||
movl %ecx, -44(%rdi)
|
||||
L(write_20words):
|
||||
movl %ecx, -40(%rdi)
|
||||
movl %ecx, -36(%rdi)
|
||||
L(write_16words):
|
||||
movl %ecx, -32(%rdi)
|
||||
movl %ecx, -28(%rdi)
|
||||
L(write_12words):
|
||||
movl %ecx, -24(%rdi)
|
||||
movl %ecx, -20(%rdi)
|
||||
L(write_8words):
|
||||
movl %ecx, -16(%rdi)
|
||||
movl %ecx, -12(%rdi)
|
||||
L(write_4words):
|
||||
movl %ecx, -8(%rdi)
|
||||
movl %ecx, -4(%rdi)
|
||||
L(write_0words):
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(write_29words):
|
||||
movl %ecx, -58(%rdi)
|
||||
movl %ecx, -54(%rdi)
|
||||
L(write_25words):
|
||||
movl %ecx, -50(%rdi)
|
||||
movl %ecx, -46(%rdi)
|
||||
L(write_21words):
|
||||
movl %ecx, -42(%rdi)
|
||||
movl %ecx, -38(%rdi)
|
||||
L(write_17words):
|
||||
movl %ecx, -34(%rdi)
|
||||
movl %ecx, -30(%rdi)
|
||||
L(write_13words):
|
||||
movl %ecx, -26(%rdi)
|
||||
movl %ecx, -22(%rdi)
|
||||
L(write_9words):
|
||||
movl %ecx, -18(%rdi)
|
||||
movl %ecx, -14(%rdi)
|
||||
L(write_5words):
|
||||
movl %ecx, -10(%rdi)
|
||||
movl %ecx, -6(%rdi)
|
||||
L(write_1words):
|
||||
mov %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(write_30words):
|
||||
movl %ecx, -60(%rdi)
|
||||
movl %ecx, -56(%rdi)
|
||||
L(write_26words):
|
||||
movl %ecx, -52(%rdi)
|
||||
movl %ecx, -48(%rdi)
|
||||
L(write_22words):
|
||||
movl %ecx, -44(%rdi)
|
||||
movl %ecx, -40(%rdi)
|
||||
L(write_18words):
|
||||
movl %ecx, -36(%rdi)
|
||||
movl %ecx, -32(%rdi)
|
||||
L(write_14words):
|
||||
movl %ecx, -28(%rdi)
|
||||
movl %ecx, -24(%rdi)
|
||||
L(write_10words):
|
||||
movl %ecx, -20(%rdi)
|
||||
movl %ecx, -16(%rdi)
|
||||
L(write_6words):
|
||||
movl %ecx, -12(%rdi)
|
||||
movl %ecx, -8(%rdi)
|
||||
L(write_2words):
|
||||
movl %ecx, -4(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(write_31words):
|
||||
movl %ecx, -62(%rdi)
|
||||
movl %ecx, -58(%rdi)
|
||||
L(write_27words):
|
||||
movl %ecx, -54(%rdi)
|
||||
movl %ecx, -50(%rdi)
|
||||
L(write_23words):
|
||||
movl %ecx, -46(%rdi)
|
||||
movl %ecx, -42(%rdi)
|
||||
L(write_19words):
|
||||
movl %ecx, -38(%rdi)
|
||||
movl %ecx, -34(%rdi)
|
||||
L(write_15words):
|
||||
movl %ecx, -30(%rdi)
|
||||
movl %ecx, -26(%rdi)
|
||||
L(write_11words):
|
||||
movl %ecx, -22(%rdi)
|
||||
movl %ecx, -18(%rdi)
|
||||
L(write_7words):
|
||||
movl %ecx, -14(%rdi)
|
||||
movl %ecx, -10(%rdi)
|
||||
L(write_3words):
|
||||
movl %ecx, -6(%rdi)
|
||||
movw %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(32wordsormore):
|
||||
shl $1, %rdx
|
||||
test $0x01, %edi
|
||||
jz L(aligned2bytes)
|
||||
mov %ecx, (%rdi)
|
||||
mov %ecx, -4(%rdi, %rdx)
|
||||
sub $2, %rdx
|
||||
add $1, %rdi
|
||||
rol $8, %ecx
|
||||
L(aligned2bytes):
|
||||
/* Fill xmm0 with the pattern. */
|
||||
movd %ecx, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
|
||||
testl $0xf, %edi
|
||||
jz L(aligned_16)
|
||||
/* RDX > 32 and RDI is not 16 byte aligned. */
|
||||
movdqu %xmm0, (%rdi)
|
||||
mov %rdi, %rsi
|
||||
and $-16, %rdi
|
||||
add $16, %rdi
|
||||
sub %rdi, %rsi
|
||||
add %rsi, %rdx
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
add %rdx, %rdi
|
||||
shr $1, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore):
|
||||
cmp $SHARED_CACHE_SIZE, %rdx
|
||||
jg L(128bytesormore_nt)
|
||||
|
||||
L(128bytesormore_normal):
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
add %rdx, %rdi
|
||||
shr $1, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore_nt):
|
||||
sub $128, %rdx
|
||||
movntdq %xmm0, (%rdi)
|
||||
movntdq %xmm0, 0x10(%rdi)
|
||||
movntdq %xmm0, 0x20(%rdi)
|
||||
movntdq %xmm0, 0x30(%rdi)
|
||||
movntdq %xmm0, 0x40(%rdi)
|
||||
movntdq %xmm0, 0x50(%rdi)
|
||||
movntdq %xmm0, 0x60(%rdi)
|
||||
movntdq %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore_nt)
|
||||
|
||||
sfence
|
||||
add %rdx, %rdi
|
||||
shr $1, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_16_128bytes):
|
||||
.int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_112bytes):
|
||||
movdqa %xmm0, -112(%rdi)
|
||||
L(aligned_16_96bytes):
|
||||
movdqa %xmm0, -96(%rdi)
|
||||
L(aligned_16_80bytes):
|
||||
movdqa %xmm0, -80(%rdi)
|
||||
L(aligned_16_64bytes):
|
||||
movdqa %xmm0, -64(%rdi)
|
||||
L(aligned_16_48bytes):
|
||||
movdqa %xmm0, -48(%rdi)
|
||||
L(aligned_16_32bytes):
|
||||
movdqa %xmm0, -32(%rdi)
|
||||
L(aligned_16_16bytes):
|
||||
movdqa %xmm0, -16(%rdi)
|
||||
L(aligned_16_0bytes):
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_114bytes):
|
||||
movdqa %xmm0, -114(%rdi)
|
||||
L(aligned_16_98bytes):
|
||||
movdqa %xmm0, -98(%rdi)
|
||||
L(aligned_16_82bytes):
|
||||
movdqa %xmm0, -82(%rdi)
|
||||
L(aligned_16_66bytes):
|
||||
movdqa %xmm0, -66(%rdi)
|
||||
L(aligned_16_50bytes):
|
||||
movdqa %xmm0, -50(%rdi)
|
||||
L(aligned_16_34bytes):
|
||||
movdqa %xmm0, -34(%rdi)
|
||||
L(aligned_16_18bytes):
|
||||
movdqa %xmm0, -18(%rdi)
|
||||
L(aligned_16_2bytes):
|
||||
movw %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_116bytes):
|
||||
movdqa %xmm0, -116(%rdi)
|
||||
L(aligned_16_100bytes):
|
||||
movdqa %xmm0, -100(%rdi)
|
||||
L(aligned_16_84bytes):
|
||||
movdqa %xmm0, -84(%rdi)
|
||||
L(aligned_16_68bytes):
|
||||
movdqa %xmm0, -68(%rdi)
|
||||
L(aligned_16_52bytes):
|
||||
movdqa %xmm0, -52(%rdi)
|
||||
L(aligned_16_36bytes):
|
||||
movdqa %xmm0, -36(%rdi)
|
||||
L(aligned_16_20bytes):
|
||||
movdqa %xmm0, -20(%rdi)
|
||||
L(aligned_16_4bytes):
|
||||
movl %ecx, -4(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_118bytes):
|
||||
movdqa %xmm0, -118(%rdi)
|
||||
L(aligned_16_102bytes):
|
||||
movdqa %xmm0, -102(%rdi)
|
||||
L(aligned_16_86bytes):
|
||||
movdqa %xmm0, -86(%rdi)
|
||||
L(aligned_16_70bytes):
|
||||
movdqa %xmm0, -70(%rdi)
|
||||
L(aligned_16_54bytes):
|
||||
movdqa %xmm0, -54(%rdi)
|
||||
L(aligned_16_38bytes):
|
||||
movdqa %xmm0, -38(%rdi)
|
||||
L(aligned_16_22bytes):
|
||||
movdqa %xmm0, -22(%rdi)
|
||||
L(aligned_16_6bytes):
|
||||
movl %ecx, -6(%rdi)
|
||||
movw %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_120bytes):
|
||||
movdqa %xmm0, -120(%rdi)
|
||||
L(aligned_16_104bytes):
|
||||
movdqa %xmm0, -104(%rdi)
|
||||
L(aligned_16_88bytes):
|
||||
movdqa %xmm0, -88(%rdi)
|
||||
L(aligned_16_72bytes):
|
||||
movdqa %xmm0, -72(%rdi)
|
||||
L(aligned_16_56bytes):
|
||||
movdqa %xmm0, -56(%rdi)
|
||||
L(aligned_16_40bytes):
|
||||
movdqa %xmm0, -40(%rdi)
|
||||
L(aligned_16_24bytes):
|
||||
movdqa %xmm0, -24(%rdi)
|
||||
L(aligned_16_8bytes):
|
||||
movq %xmm0, -8(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_122bytes):
|
||||
movdqa %xmm0, -122(%rdi)
|
||||
L(aligned_16_106bytes):
|
||||
movdqa %xmm0, -106(%rdi)
|
||||
L(aligned_16_90bytes):
|
||||
movdqa %xmm0, -90(%rdi)
|
||||
L(aligned_16_74bytes):
|
||||
movdqa %xmm0, -74(%rdi)
|
||||
L(aligned_16_58bytes):
|
||||
movdqa %xmm0, -58(%rdi)
|
||||
L(aligned_16_42bytes):
|
||||
movdqa %xmm0, -42(%rdi)
|
||||
L(aligned_16_26bytes):
|
||||
movdqa %xmm0, -26(%rdi)
|
||||
L(aligned_16_10bytes):
|
||||
movq %xmm0, -10(%rdi)
|
||||
movw %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_124bytes):
|
||||
movdqa %xmm0, -124(%rdi)
|
||||
L(aligned_16_108bytes):
|
||||
movdqa %xmm0, -108(%rdi)
|
||||
L(aligned_16_92bytes):
|
||||
movdqa %xmm0, -92(%rdi)
|
||||
L(aligned_16_76bytes):
|
||||
movdqa %xmm0, -76(%rdi)
|
||||
L(aligned_16_60bytes):
|
||||
movdqa %xmm0, -60(%rdi)
|
||||
L(aligned_16_44bytes):
|
||||
movdqa %xmm0, -44(%rdi)
|
||||
L(aligned_16_28bytes):
|
||||
movdqa %xmm0, -28(%rdi)
|
||||
L(aligned_16_12bytes):
|
||||
movq %xmm0, -12(%rdi)
|
||||
movl %ecx, -4(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_126bytes):
|
||||
movdqa %xmm0, -126(%rdi)
|
||||
L(aligned_16_110bytes):
|
||||
movdqa %xmm0, -110(%rdi)
|
||||
L(aligned_16_94bytes):
|
||||
movdqa %xmm0, -94(%rdi)
|
||||
L(aligned_16_78bytes):
|
||||
movdqa %xmm0, -78(%rdi)
|
||||
L(aligned_16_62bytes):
|
||||
movdqa %xmm0, -62(%rdi)
|
||||
L(aligned_16_46bytes):
|
||||
movdqa %xmm0, -46(%rdi)
|
||||
L(aligned_16_30bytes):
|
||||
movdqa %xmm0, -30(%rdi)
|
||||
L(aligned_16_14bytes):
|
||||
movq %xmm0, -14(%rdi)
|
||||
movl %ecx, -6(%rdi)
|
||||
movw %cx, -2(%rdi)
|
||||
ret
|
||||
|
||||
END (MEMSET)
|
|
@ -1,373 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef MEMSET
|
||||
# define MEMSET android_memset32
|
||||
#endif
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define JMPTBL(I, B) I - B
|
||||
|
||||
/* Branch to an entry in a jump table. TABLE is a jump table with
|
||||
relative offsets. INDEX is a register contains the index into the
|
||||
jump table. SCALE is the scale of INDEX. */
|
||||
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
lea TABLE(%rip), %r11; \
|
||||
movslq (%r11, INDEX, SCALE), INDEX; \
|
||||
lea (%r11, INDEX), INDEX; \
|
||||
jmp *INDEX
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ALIGN (4)
|
||||
ENTRY (MEMSET) // Address in rdi
|
||||
shr $2, %rdx // Count in rdx
|
||||
movl %esi, %ecx // Pattern in ecx
|
||||
|
||||
cmp $16, %rdx
|
||||
jae L(16dbwordsormore)
|
||||
|
||||
L(write_less16dbwords):
|
||||
lea (%rdi, %rdx, 4), %rdi
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_less16dbwords), %rdx, 4)
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_less16dbwords):
|
||||
.int JMPTBL (L(write_0dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_1dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_2dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_3dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_4dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_5dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_6dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_7dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_8dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_9dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_10dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_11dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_12dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_13dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_14dbwords), L(table_less16dbwords))
|
||||
.int JMPTBL (L(write_15dbwords), L(table_less16dbwords))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(write_15dbwords):
|
||||
movl %ecx, -60(%rdi)
|
||||
L(write_14dbwords):
|
||||
movl %ecx, -56(%rdi)
|
||||
L(write_13dbwords):
|
||||
movl %ecx, -52(%rdi)
|
||||
L(write_12dbwords):
|
||||
movl %ecx, -48(%rdi)
|
||||
L(write_11dbwords):
|
||||
movl %ecx, -44(%rdi)
|
||||
L(write_10dbwords):
|
||||
movl %ecx, -40(%rdi)
|
||||
L(write_9dbwords):
|
||||
movl %ecx, -36(%rdi)
|
||||
L(write_8dbwords):
|
||||
movl %ecx, -32(%rdi)
|
||||
L(write_7dbwords):
|
||||
movl %ecx, -28(%rdi)
|
||||
L(write_6dbwords):
|
||||
movl %ecx, -24(%rdi)
|
||||
L(write_5dbwords):
|
||||
movl %ecx, -20(%rdi)
|
||||
L(write_4dbwords):
|
||||
movl %ecx, -16(%rdi)
|
||||
L(write_3dbwords):
|
||||
movl %ecx, -12(%rdi)
|
||||
L(write_2dbwords):
|
||||
movl %ecx, -8(%rdi)
|
||||
L(write_1dbwords):
|
||||
movl %ecx, -4(%rdi)
|
||||
L(write_0dbwords):
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(16dbwordsormore):
|
||||
test $3, %edi
|
||||
jz L(aligned4bytes)
|
||||
mov %ecx, (%rdi)
|
||||
mov %ecx, -4(%rdi, %rdx, 4)
|
||||
sub $1, %rdx
|
||||
rol $24, %ecx
|
||||
add $1, %rdi
|
||||
test $3, %edi
|
||||
jz L(aligned4bytes)
|
||||
ror $8, %ecx
|
||||
add $1, %rdi
|
||||
test $3, %edi
|
||||
jz L(aligned4bytes)
|
||||
ror $8, %ecx
|
||||
add $1, %rdi
|
||||
L(aligned4bytes):
|
||||
shl $2, %rdx
|
||||
|
||||
/* Fill xmm0 with the pattern. */
|
||||
movd %ecx, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
|
||||
testl $0xf, %edi
|
||||
jz L(aligned_16)
|
||||
/* RDX > 32 and RDI is not 16 byte aligned. */
|
||||
movdqu %xmm0, (%rdi)
|
||||
mov %rdi, %rsi
|
||||
and $-16, %rdi
|
||||
add $16, %rdi
|
||||
sub %rdi, %rsi
|
||||
add %rsi, %rdx
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
add %rdx, %rdi
|
||||
shr $2, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore):
|
||||
cmp $SHARED_CACHE_SIZE, %rdx
|
||||
jg L(128bytesormore_nt)
|
||||
|
||||
L(128bytesormore_normal):
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jl L(128bytesless_normal)
|
||||
|
||||
sub $128, %rdx
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 0x10(%rdi)
|
||||
movdqa %xmm0, 0x20(%rdi)
|
||||
movdqa %xmm0, 0x30(%rdi)
|
||||
movdqa %xmm0, 0x40(%rdi)
|
||||
movdqa %xmm0, 0x50(%rdi)
|
||||
movdqa %xmm0, 0x60(%rdi)
|
||||
movdqa %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
add %rdx, %rdi
|
||||
shr $2, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
L(128bytesormore_nt):
|
||||
sub $128, %rdx
|
||||
movntdq %xmm0, (%rdi)
|
||||
movntdq %xmm0, 0x10(%rdi)
|
||||
movntdq %xmm0, 0x20(%rdi)
|
||||
movntdq %xmm0, 0x30(%rdi)
|
||||
movntdq %xmm0, 0x40(%rdi)
|
||||
movntdq %xmm0, 0x50(%rdi)
|
||||
movntdq %xmm0, 0x60(%rdi)
|
||||
movntdq %xmm0, 0x70(%rdi)
|
||||
lea 128(%rdi), %rdi
|
||||
cmp $128, %rdx
|
||||
jge L(128bytesormore_nt)
|
||||
|
||||
sfence
|
||||
add %rdx, %rdi
|
||||
shr $2, %rdx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4)
|
||||
|
||||
.pushsection .rodata.sse2,"a",@progbits
|
||||
ALIGN (2)
|
||||
L(table_16_128bytes):
|
||||
.int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
|
||||
.int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
|
||||
.popsection
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_112bytes):
|
||||
movdqa %xmm0, -112(%rdi)
|
||||
L(aligned_16_96bytes):
|
||||
movdqa %xmm0, -96(%rdi)
|
||||
L(aligned_16_80bytes):
|
||||
movdqa %xmm0, -80(%rdi)
|
||||
L(aligned_16_64bytes):
|
||||
movdqa %xmm0, -64(%rdi)
|
||||
L(aligned_16_48bytes):
|
||||
movdqa %xmm0, -48(%rdi)
|
||||
L(aligned_16_32bytes):
|
||||
movdqa %xmm0, -32(%rdi)
|
||||
L(aligned_16_16bytes):
|
||||
movdqa %xmm0, -16(%rdi)
|
||||
L(aligned_16_0bytes):
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_116bytes):
|
||||
movdqa %xmm0, -116(%rdi)
|
||||
L(aligned_16_100bytes):
|
||||
movdqa %xmm0, -100(%rdi)
|
||||
L(aligned_16_84bytes):
|
||||
movdqa %xmm0, -84(%rdi)
|
||||
L(aligned_16_68bytes):
|
||||
movdqa %xmm0, -68(%rdi)
|
||||
L(aligned_16_52bytes):
|
||||
movdqa %xmm0, -52(%rdi)
|
||||
L(aligned_16_36bytes):
|
||||
movdqa %xmm0, -36(%rdi)
|
||||
L(aligned_16_20bytes):
|
||||
movdqa %xmm0, -20(%rdi)
|
||||
L(aligned_16_4bytes):
|
||||
movl %ecx, -4(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_120bytes):
|
||||
movdqa %xmm0, -120(%rdi)
|
||||
L(aligned_16_104bytes):
|
||||
movdqa %xmm0, -104(%rdi)
|
||||
L(aligned_16_88bytes):
|
||||
movdqa %xmm0, -88(%rdi)
|
||||
L(aligned_16_72bytes):
|
||||
movdqa %xmm0, -72(%rdi)
|
||||
L(aligned_16_56bytes):
|
||||
movdqa %xmm0, -56(%rdi)
|
||||
L(aligned_16_40bytes):
|
||||
movdqa %xmm0, -40(%rdi)
|
||||
L(aligned_16_24bytes):
|
||||
movdqa %xmm0, -24(%rdi)
|
||||
L(aligned_16_8bytes):
|
||||
movq %xmm0, -8(%rdi)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(aligned_16_124bytes):
|
||||
movdqa %xmm0, -124(%rdi)
|
||||
L(aligned_16_108bytes):
|
||||
movdqa %xmm0, -108(%rdi)
|
||||
L(aligned_16_92bytes):
|
||||
movdqa %xmm0, -92(%rdi)
|
||||
L(aligned_16_76bytes):
|
||||
movdqa %xmm0, -76(%rdi)
|
||||
L(aligned_16_60bytes):
|
||||
movdqa %xmm0, -60(%rdi)
|
||||
L(aligned_16_44bytes):
|
||||
movdqa %xmm0, -44(%rdi)
|
||||
L(aligned_16_28bytes):
|
||||
movdqa %xmm0, -28(%rdi)
|
||||
L(aligned_16_12bytes):
|
||||
movq %xmm0, -12(%rdi)
|
||||
movl %ecx, -4(%rdi)
|
||||
ret
|
||||
|
||||
END (MEMSET)
|
|
@ -14,8 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ANDROID_CUTILS_MEMORY_H
|
||||
#define ANDROID_CUTILS_MEMORY_H
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
@ -24,12 +23,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* size is given in bytes and must be multiple of 2 */
|
||||
void android_memset16(uint16_t* dst, uint16_t value, size_t size);
|
||||
|
||||
/* size is given in bytes and must be multiple of 4 */
|
||||
void android_memset32(uint32_t* dst, uint32_t value, size_t size);
|
||||
|
||||
#if defined(__GLIBC__) || defined(_WIN32)
|
||||
/* Declaration of strlcpy() for platforms that don't already have it. */
|
||||
size_t strlcpy(char *dst, const char *src, size_t size);
|
||||
|
@ -38,5 +31,3 @@ size_t strlcpy(char *dst, const char *src, size_t size);
|
|||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // ANDROID_CUTILS_MEMORY_H
|
||||
|
|
|
@ -1,181 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <cutils/memory.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define FENCEPOST_LENGTH 8
|
||||
|
||||
#define MAX_TEST_SIZE (64*1024)
|
||||
// Choose values that have no repeating byte values.
|
||||
#define MEMSET16_PATTERN 0xb139
|
||||
#define MEMSET32_PATTERN 0x48193a27
|
||||
|
||||
enum test_e {
|
||||
MEMSET16 = 0,
|
||||
MEMSET32,
|
||||
};
|
||||
|
||||
static int g_memset16_aligns[][2] = {
|
||||
{ 2, 0 },
|
||||
{ 4, 0 },
|
||||
{ 8, 0 },
|
||||
{ 16, 0 },
|
||||
{ 32, 0 },
|
||||
{ 64, 0 },
|
||||
{ 128, 0 },
|
||||
|
||||
{ 4, 2 },
|
||||
|
||||
{ 8, 2 },
|
||||
{ 8, 4 },
|
||||
{ 8, 6 },
|
||||
|
||||
{ 128, 2 },
|
||||
{ 128, 4 },
|
||||
{ 128, 6 },
|
||||
{ 128, 8 },
|
||||
{ 128, 10 },
|
||||
{ 128, 12 },
|
||||
{ 128, 14 },
|
||||
{ 128, 16 },
|
||||
};
|
||||
|
||||
static int g_memset32_aligns[][2] = {
|
||||
{ 4, 0 },
|
||||
{ 8, 0 },
|
||||
{ 16, 0 },
|
||||
{ 32, 0 },
|
||||
{ 64, 0 },
|
||||
{ 128, 0 },
|
||||
|
||||
{ 8, 4 },
|
||||
|
||||
{ 128, 4 },
|
||||
{ 128, 8 },
|
||||
{ 128, 12 },
|
||||
{ 128, 16 },
|
||||
};
|
||||
|
||||
static size_t GetIncrement(size_t len, size_t min_incr) {
|
||||
if (len >= 4096) {
|
||||
return 1024;
|
||||
} else if (len >= 1024) {
|
||||
return 256;
|
||||
}
|
||||
return min_incr;
|
||||
}
|
||||
|
||||
// Return a pointer into the current buffer with the specified alignment.
|
||||
static void *GetAlignedPtr(void *orig_ptr, int alignment, int or_mask) {
|
||||
uint64_t ptr = reinterpret_cast<uint64_t>(orig_ptr);
|
||||
if (alignment > 0) {
|
||||
// When setting the alignment, set it to exactly the alignment chosen.
|
||||
// The pointer returned will be guaranteed not to be aligned to anything
|
||||
// more than that.
|
||||
ptr += alignment - (ptr & (alignment - 1));
|
||||
ptr |= alignment | or_mask;
|
||||
}
|
||||
|
||||
return reinterpret_cast<void*>(ptr);
|
||||
}
|
||||
|
||||
static void SetFencepost(uint8_t *buffer) {
|
||||
for (int i = 0; i < FENCEPOST_LENGTH; i += 2) {
|
||||
buffer[i] = 0xde;
|
||||
buffer[i+1] = 0xad;
|
||||
}
|
||||
}
|
||||
|
||||
static void VerifyFencepost(uint8_t *buffer) {
|
||||
for (int i = 0; i < FENCEPOST_LENGTH; i += 2) {
|
||||
if (buffer[i] != 0xde || buffer[i+1] != 0xad) {
|
||||
uint8_t expected_value;
|
||||
if (buffer[i] == 0xde) {
|
||||
i++;
|
||||
expected_value = 0xad;
|
||||
} else {
|
||||
expected_value = 0xde;
|
||||
}
|
||||
ASSERT_EQ(expected_value, buffer[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunMemsetTests(test_e test_type, uint32_t value, int align[][2], size_t num_aligns) {
|
||||
size_t min_incr = 4;
|
||||
if (test_type == MEMSET16) {
|
||||
min_incr = 2;
|
||||
value |= value << 16;
|
||||
}
|
||||
std::unique_ptr<uint32_t[]> expected_buf(new uint32_t[MAX_TEST_SIZE/sizeof(uint32_t)]);
|
||||
for (size_t i = 0; i < MAX_TEST_SIZE/sizeof(uint32_t); i++) {
|
||||
expected_buf[i] = value;
|
||||
}
|
||||
|
||||
// Allocate one large buffer with lots of extra space so that we can
|
||||
// guarantee that all possible alignments will fit.
|
||||
std::unique_ptr<uint8_t[]> buf(new uint8_t[3*MAX_TEST_SIZE]);
|
||||
uint8_t *buf_align;
|
||||
for (size_t i = 0; i < num_aligns; i++) {
|
||||
size_t incr = min_incr;
|
||||
for (size_t len = incr; len <= MAX_TEST_SIZE; len += incr) {
|
||||
incr = GetIncrement(len, min_incr);
|
||||
|
||||
buf_align = reinterpret_cast<uint8_t*>(GetAlignedPtr(
|
||||
buf.get()+FENCEPOST_LENGTH, align[i][0], align[i][1]));
|
||||
|
||||
SetFencepost(&buf_align[-FENCEPOST_LENGTH]);
|
||||
SetFencepost(&buf_align[len]);
|
||||
|
||||
memset(buf_align, 0xff, len);
|
||||
if (test_type == MEMSET16) {
|
||||
android_memset16(reinterpret_cast<uint16_t*>(buf_align), value, len);
|
||||
} else {
|
||||
android_memset32(reinterpret_cast<uint32_t*>(buf_align), value, len);
|
||||
}
|
||||
ASSERT_EQ(0, memcmp(expected_buf.get(), buf_align, len))
|
||||
<< "Failed size " << len << " align " << align[i][0] << " " << align[i][1] << "\n";
|
||||
|
||||
VerifyFencepost(&buf_align[-FENCEPOST_LENGTH]);
|
||||
VerifyFencepost(&buf_align[len]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(libcutils, android_memset16_non_zero) {
|
||||
RunMemsetTests(MEMSET16, MEMSET16_PATTERN, g_memset16_aligns, sizeof(g_memset16_aligns)/sizeof(int[2]));
|
||||
}
|
||||
|
||||
TEST(libcutils, android_memset16_zero) {
|
||||
RunMemsetTests(MEMSET16, 0, g_memset16_aligns, sizeof(g_memset16_aligns)/sizeof(int[2]));
|
||||
}
|
||||
|
||||
TEST(libcutils, android_memset32_non_zero) {
|
||||
RunMemsetTests(MEMSET32, MEMSET32_PATTERN, g_memset32_aligns, sizeof(g_memset32_aligns)/sizeof(int[2]));
|
||||
}
|
||||
|
||||
TEST(libcutils, android_memset32_zero) {
|
||||
RunMemsetTests(MEMSET32, 0, g_memset32_aligns, sizeof(g_memset32_aligns)/sizeof(int[2]));
|
||||
}
|
Loading…
Reference in New Issue