ARM: assembler: introduce adr_l, ldr_l and str_l macros

Like arm64, ARM supports position independent code sequences that
produce symbol references with a greater reach than the ordinary
adr/ldr instructions. Since on ARM, the adrl pseudo-instruction is
only supported in ARM mode (and not at all when using Clang), having
a adr_l macro like we do on arm64 is useful, and increases symmetry
as well.

Currently, we use open coded instruction sequences involving literals
and arithmetic operations. Instead, we can use movw/movt pairs on v7
CPUs, circumventing the D-cache entirely.

E.g., on v7+ CPUs, we can emit a PC-relative reference as follows:

       movw         <reg>, #:lower16:<sym> - (1f + 8)
       movt         <reg>, #:upper16:<sym> - (1f + 8)
  1:   add          <reg>, <reg>, pc

For older CPUs, we can emit the literal into a subsection, allowing it
to be emitted out of line while retaining the ability to perform
arithmetic on label offsets.

E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:

       ldr          <reg>, 2f
  1:   add          <reg>, <reg>, pc
       .subsection  1
  2:   .long        <sym> - (1b + 8)
       .previous

This is allowed by the assembler because, unlike ordinary sections,
subsections are combined into a single section in the object file, and
so the label references are not true cross-section references that are
visible as relocations. (Subsections have been available in binutils
since 2004 at least, so they should not cause any issues with older
toolchains.)

So use the above to implement the macros mov_l, adr_l, ldr_l and str_l,
all of which will use movw/movt pairs on v7 and later CPUs, and use
PC-relative literals otherwise.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
This commit is contained in:
Ard Biesheuvel 2020-09-14 11:23:39 +03:00
parent 4e79f0211b
commit 0b1674638a
1 changed files with 84 additions and 0 deletions

View File

@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#define _ASM_NOKPROBE(entry)
#endif
.macro __adldst_l, op, reg, sym, tmp, c
.if __LINUX_ARM_ARCH__ < 7
ldr\c \tmp, .La\@
.subsection 1
.align 2
.La\@: .long \sym - .Lpc\@
.previous
.else
.ifnb \c
THUMB( ittt \c )
.endif
movw\c \tmp, #:lower16:\sym - .Lpc\@
movt\c \tmp, #:upper16:\sym - .Lpc\@
.endif
#ifndef CONFIG_THUMB2_KERNEL
.set .Lpc\@, . + 8 // PC bias
.ifc \op, add
add\c \reg, \tmp, pc
.else
\op\c \reg, [pc, \tmp]
.endif
#else
.Lb\@: add\c \tmp, \tmp, pc
/*
* In Thumb-2 builds, the PC bias depends on whether we are currently
* emitting into a .arm or a .thumb section. The size of the add opcode
* above will be 2 bytes when emitting in Thumb mode and 4 bytes when
* emitting in ARM mode, so let's use this to account for the bias.
*/
.set .Lpc\@, . + (. - .Lb\@)
.ifnc \op, add
\op\c \reg, [\tmp]
.endif
#endif
.endm
/*
* mov_l - move a constant value or [relocated] address into a register
*/
.macro mov_l, dst:req, imm:req
.if __LINUX_ARM_ARCH__ < 7
ldr \dst, =\imm
.else
movw \dst, #:lower16:\imm
movt \dst, #:upper16:\imm
.endif
.endm
/*
* adr_l - adr pseudo-op with unlimited range
*
* @dst: destination register
* @sym: name of the symbol
* @cond: conditional opcode suffix
*/
.macro adr_l, dst:req, sym:req, cond
__adldst_l add, \dst, \sym, \dst, \cond
.endm
/*
* ldr_l - ldr <literal> pseudo-op with unlimited range
*
* @dst: destination register
* @sym: name of the symbol
* @cond: conditional opcode suffix
*/
.macro ldr_l, dst:req, sym:req, cond
__adldst_l ldr, \dst, \sym, \dst, \cond
.endm
/*
* str_l - str <literal> pseudo-op with unlimited range
*
* @src: source register
* @sym: name of the symbol
* @tmp: mandatory scratch register
* @cond: conditional opcode suffix
*/
.macro str_l, src:req, sym:req, tmp:req, cond
__adldst_l str, \src, \sym, \tmp, \cond
.endm
#endif /* __ASM_ASSEMBLER_H__ */