x86/asm/entry/64: Clean up entry_64.S

Make the 64-bit syscall entry code a bit more readable:

 - use consistent assembly coding style similar to the other entry_*.S files

 - remove old comments that are not true anymore

 - eliminate whitespace noise

 - use consistent vertical spacing

 - fix various comments

 - reorganize entry point generation tables to be more readable

No code changed:

  # arch/x86/entry/entry_64.o:

   text    data     bss     dec     hex filename
  12282       0       0   12282    2ffa entry_64.o.before
  12282       0       0   12282    2ffa entry_64.o.after

md5:
   cbab1f2d727a2a8a87618eeb79f391b7  entry_64.o.before.asm
   cbab1f2d727a2a8a87618eeb79f391b7  entry_64.o.after.asm

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-06-08 20:43:07 +02:00
parent 9dda1658a9
commit 4d7321381e
1 changed files with 401 additions and 413 deletions

View File

@ -4,26 +4,20 @@
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
*/
/*
*
* entry.S contains the system-call and fault low-level handling routines.
*
* Some of this is documented in Documentation/x86/entry_64.txt
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
*
* A note on terminology:
* - iret frame: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack.
*
* Some macro usage:
* - ENTRY/END Define functions in the symbol table.
* - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
* - idtentry - Define exception entry points.
* - ENTRY/END: Define functions in the symbol table.
* - TRACE_IRQ_*: Trace hardirq state for lock debugging.
* - idtentry: Define exception entry points.
*/
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/cache.h>
@ -53,7 +47,6 @@
.code64
.section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
swapgs
@ -61,7 +54,6 @@ ENTRY(native_usergs_sysret64)
ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
.macro TRACE_IRQS_IRETQ
#ifdef CONFIG_TRACE_IRQFLAGS
bt $9, EFLAGS(%rsp) /* interrupts off? */
@ -110,9 +102,9 @@ ENDPROC(native_usergs_sysret64)
#endif
/*
* 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
* 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
@ -221,7 +213,7 @@ entry_SYSCALL_64_fastpath:
movq EFLAGS(%rsp), %r11
movq RSP(%rsp), %rsp
/*
* 64bit SYSRET restores rip from rcx,
* 64-bit SYSRET restores rip from rcx,
* rflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* Restoration of rflags re-enables interrupts.
@ -294,9 +286,11 @@ GLOBAL(int_with_check)
andl $~TS_COMPAT, TI_status(%rcx)
jmp syscall_return
/* Either reschedule or signal or syscall exit tracking needed. */
/* First do a reschedule test. */
/* edx: work, edi: workmask */
/*
* Either reschedule or signal or syscall exit tracking needed.
* First do a reschedule test.
* edx: work, edi: workmask
*/
int_careful:
bt $TIF_NEED_RESCHED, %edx
jnc int_very_careful
@ -318,7 +312,7 @@ int_very_careful:
testl $_TIF_WORK_SYSCALL_EXIT, %edx
jz int_signal
pushq %rdi
leaq 8(%rsp),%rdi # &ptregs -> arg1
leaq 8(%rsp), %rdi /* &ptregs -> arg1 */
call syscall_trace_leave
popq %rdi
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi
@ -327,8 +321,8 @@ int_very_careful:
int_signal:
testl $_TIF_DO_NOTIFY_MASK, %edx
jz 1f
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
movq %rsp, %rdi /* &ptregs -> arg1 */
xorl %esi, %esi /* oldset -> arg2 */
call do_notify_resume
1: movl $_TIF_WORK_MASK, %edi
int_restore_rest:
@ -362,9 +356,11 @@ syscall_return:
.ifne __VIRTUAL_MASK_SHIFT - 47
.error "virtual address width changed -- SYSRET checks need update"
.endif
/* Change top 16 bits to be the sign-extension of 47th bit */
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
jne opportunistic_sysret_failed
@ -505,13 +501,13 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK, TI_flags(%r8)
pushq $0x0002
popfq # reset kernel eflags
popfq /* reset kernel eflags */
call schedule_tail # rdi: 'prev' task parameter
call schedule_tail /* rdi: 'prev' task parameter */
RESTORE_EXTRA_REGS
testb $3, CS(%rsp) # from kernel_thread?
testb $3, CS(%rsp) /* from kernel_thread? */
/*
* By the time we get here, we have no idea whether our pt_regs,
@ -522,8 +518,10 @@ ENTRY(ret_from_fork)
*/
jnz int_ret_from_sys_call
/* We came from kernel_thread */
/* nb: we depend on RESTORE_EXTRA_REGS above */
/*
* We came from kernel_thread
* nb: we depend on RESTORE_EXTRA_REGS above
*/
movq %rbp, %rdi
call *%rbx
movl $0, RAX(%rsp)
@ -617,9 +615,8 @@ ret_from_intr:
/* Interrupt came from user space */
retint_user:
GET_THREAD_INFO(%rcx)
/*
* %rcx: thread info. Interrupts off.
*/
/* %rcx: thread info. Interrupts are off. */
retint_with_reschedule:
movl $_TIF_WORK_MASK, %edi
retint_check:
@ -643,7 +640,7 @@ retint_kernel:
#ifdef CONFIG_PREEMPT
/* Interrupts are off */
/* Check if we need preemption */
bt $9,EFLAGS(%rsp) /* interrupts were off? */
bt $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
@ -732,8 +729,8 @@ retint_signal:
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_EXTRA_REGS
movq $-1, ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
xorl %esi, %esi /* oldset */
movq %rsp, %rdi /* &pt_regs */
call do_notify_resume
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
@ -774,60 +771,45 @@ trace_apicinterrupt \num \sym
.endm
#ifdef CONFIG_SMP
apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
apicinterrupt3 REBOOT_VECTOR \
reboot_interrupt smp_reboot_interrupt
apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
apicinterrupt3 UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
#endif
#ifdef CONFIG_X86_MCE_AMD
apicinterrupt DEFERRED_ERROR_VECTOR \
deferred_error_interrupt smp_deferred_error_interrupt
apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
#endif
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
apicinterrupt CALL_FUNCTION_VECTOR \
call_function_interrupt smp_call_function_interrupt
apicinterrupt RESCHEDULE_VECTOR \
reschedule_interrupt smp_reschedule_interrupt
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
apicinterrupt ERROR_APIC_VECTOR \
error_interrupt smp_error_interrupt
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR \
irq_work_interrupt smp_irq_work_interrupt
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
#endif
/*
@ -853,8 +835,8 @@ ENTRY(\sym)
.if \paranoid
.if \paranoid == 1
testb $3, CS(%rsp) /* If coming from userspace, switch */
jnz 1f /* stacks. */
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
jnz 1f
.endif
call paranoid_entry
.else
@ -952,8 +934,10 @@ idtentry alignment_check do_alignment_check has_error_code=1
idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
/* Reload gs selector with exception handling */
/* edi: new selector */
/*
* Reload gs selector with exception handling
* edi: new selector
*/
ENTRY(native_load_gs_index)
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
@ -982,7 +966,7 @@ ENTRY(do_softirq_own_stack)
mov %rsp, %rbp
incl PER_CPU_VAR(irq_count)
cmove PER_CPU_VAR(irq_stack_ptr), %rsp
push %rbp # backlink for old unwinder
push %rbp /* frame pointer backlink */
call __do_softirq
leaveq
decl PER_CPU_VAR(irq_count)
@ -1005,16 +989,17 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
*/
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
movq %rdi, %rsp # we don't return, adjust the stack frame
movq %rdi, %rsp /* we don't return, adjust the stack frame */
11: incl PER_CPU_VAR(irq_count)
movq %rsp, %rbp
cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
pushq %rbp # backlink for old unwinder
pushq %rbp /* frame pointer backlink */
call xen_evtchn_do_upcall
popq %rsp
decl PER_CPU_VAR(irq_count)
@ -1082,16 +1067,20 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
idtentry xen_debug do_debug has_error_code=0
idtentry xen_int3 do_int3 has_error_code=0
idtentry xen_stack_segment do_stack_segment has_error_code=1
#endif
idtentry general_protection do_general_protection has_error_code=1
trace_idtentry page_fault do_page_fault has_error_code=1
#ifdef CONFIG_KVM_GUEST
idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
#endif
@ -1124,8 +1113,9 @@ END(paranoid_entry)
* in syscall entry), so checking for preemption here would
* be complicated. Fortunately, we there's no good reason
* to try to handle preemption here.
*
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
*/
/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
ENTRY(paranoid_exit)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF_DEBUG
@ -1268,6 +1258,7 @@ ENTRY(nmi)
cmpq %rdx, 4*8(%rsp)
/* If the stack pointer is above the NMI stack, this is a normal NMI */
ja first_nmi
subq $EXCEPTION_STKSZ, %rdx
cmpq %rdx, 4*8(%rsp)
/* If it is below the NMI stack, it is a normal NMI */
@ -1349,9 +1340,7 @@ first_nmi:
/* Set the NMI executing variable on the stack. */
pushq $1
/*
* Leave room for the "copied" frame
*/
/* Leave room for the "copied" frame */
subq $(5*8), %rsp
/* Copy the stack frame to the Saved frame */
@ -1444,4 +1433,3 @@ ENTRY(ignore_sysret)
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)