linux/arch/arm/lib/copy_from_user.S

130 lines
2.4 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/lib/copy_from_user.S
*
* Author: Nicolas Pitre
* Created: Sep 29, 2005
* Copyright: MontaVista Software, Inc.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
/*
* Prototype:
*
* size_t arm_copy_from_user(void *to, const void *from, size_t n)
*
* Purpose:
*
* copy a block to kernel memory from user memory
*
* Params:
*
* to = kernel memory
* from = user memory
* n = number of bytes to copy
*
* Return value:
*
* Number of bytes NOT copied.
*/
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 17:09:48 +08:00
#ifdef CONFIG_CPU_USE_DOMAINS
#ifndef CONFIG_THUMB2_KERNEL
#define LDR1W_SHIFT 0
#else
#define LDR1W_SHIFT 1
#endif
.macro ldr1w ptr reg abort
ldrusr \reg, \ptr, 4, abort=\abort
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
ldr1w \ptr, \reg1, \abort
ldr1w \ptr, \reg2, \abort
ldr1w \ptr, \reg3, \abort
ldr1w \ptr, \reg4, \abort
.endm
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
.endm
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 17:09:48 +08:00
#else
#define LDR1W_SHIFT 0
.macro ldr1w ptr reg abort
USERL(\abort, W(ldr) \reg, [\ptr], #4)
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
.endm
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
.endm
#endif /* CONFIG_CPU_USE_DOMAINS */
.macro ldr1b ptr reg cond=al abort
ldrusr \reg, \ptr, 1, \cond, abort=\abort
.endm
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 17:09:48 +08:00
#define STR1W_SHIFT 0
.macro str1w ptr reg abort
W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
.macro str1b ptr reg cond=al abort
strb\cond \reg, [\ptr], #1
.endm
.macro enter reg1 reg2
mov r3, #0
stmdb sp!, {r0, r2, r3, \reg1, \reg2}
.endm
.macro usave reg1 reg2
UNWIND( .save {r0, r2, r3, \reg1, \reg2} )
.endm
.macro exit reg1 reg2
add sp, sp, #8
ldmfd sp!, {r0, \reg1, \reg2}
.endm
.text
ENTRY(arm_copy_from_user)
#ifdef CONFIG_CPU_SPECTRE
get_thread_info r3
ldr r3, [r3, #TI_ADDR_LIMIT]
uaccess_mask_range_ptr r1, r2, r3, ip
#endif
#include "copy_template.S"
ENDPROC(arm_copy_from_user)
.pushsection .fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2, r3}
sub r0, r0, r1
rsb r0, r0, r2
copy_abort_end
.popsection