linux/arch/arm/lib/copy_to_user.S

130 lines
2.4 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/lib/copy_to_user.S
*
* Author: Nicolas Pitre
* Created: Sep 29, 2005
* Copyright: MontaVista Software, Inc.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
/*
* Prototype:
*
* size_t arm_copy_to_user(void *to, const void *from, size_t n)
*
* Purpose:
*
* copy a block to user memory from kernel memory
*
* Params:
*
* to = user memory
* from = kernel memory
* n = number of bytes to copy
*
* Return value:
*
* Number of bytes NOT copied.
*/
#define LDR1W_SHIFT 0
.macro ldr1w ptr reg abort
W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
.endm
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
.macro ldr1b ptr reg cond=al abort
ldrb\cond \reg, [\ptr], #1
.endm
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 17:09:48 +08:00
#ifdef CONFIG_CPU_USE_DOMAINS
#ifndef CONFIG_THUMB2_KERNEL
#define STR1W_SHIFT 0
#else
#define STR1W_SHIFT 1
#endif
.macro str1w ptr reg abort
strusr \reg, \ptr, 4, abort=\abort
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
str1w \ptr, \reg1, \abort
str1w \ptr, \reg2, \abort
str1w \ptr, \reg3, \abort
str1w \ptr, \reg4, \abort
str1w \ptr, \reg5, \abort
str1w \ptr, \reg6, \abort
str1w \ptr, \reg7, \abort
str1w \ptr, \reg8, \abort
.endm
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 17:09:48 +08:00
#else
#define STR1W_SHIFT 0
.macro str1w ptr reg abort
USERL(\abort, W(str) \reg, [\ptr], #4)
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
.endm
#endif /* CONFIG_CPU_USE_DOMAINS */
.macro str1b ptr reg cond=al abort
strusr \reg, \ptr, 1, \cond, abort=\abort
.endm
.macro enter reg1 reg2
mov r3, #0
stmdb sp!, {r0, r2, r3, \reg1, \reg2}
.endm
.macro usave reg1 reg2
UNWIND( .save {r0, r2, r3, \reg1, \reg2} )
.endm
.macro exit reg1 reg2
add sp, sp, #8
ldmfd sp!, {r0, \reg1, \reg2}
.endm
.text
ENTRY(__copy_to_user_std)
WEAK(arm_copy_to_user)
#ifdef CONFIG_CPU_SPECTRE
get_thread_info r3
ldr r3, [r3, #TI_ADDR_LIMIT]
uaccess_mask_range_ptr r0, r2, r3, ip
#endif
#include "copy_template.S"
ENDPROC(arm_copy_to_user)
ENDPROC(__copy_to_user_std)
.pushsection .text.fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2, r3}
sub r0, r0, r1
rsb r0, r0, r2
copy_abort_end
.popsection