linux/arch/powerpc/kvm/book3s_interrupts.S

240 lines
5.5 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright SUSE Linux Products GmbH 2009
*
* Authors: Alexander Graf <agraf@suse.de>
*/
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/asm-compat.h>
#if defined(CONFIG_PPC_BOOK3S_64)
#ifdef PPC64_ELF_ABI_v2
#define FUNC(name) name
#else
#define FUNC(name) GLUE(.,name)
#endif
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
#define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU
#elif defined(CONFIG_PPC_BOOK3S_32)
#define FUNC(name) name
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
#endif /* CONFIG_PPC_BOOK3S_64 */
#define VCPU_LOAD_NVGPRS(vcpu) \
PPC_LL r14, VCPU_GPR(R14)(vcpu); \
PPC_LL r15, VCPU_GPR(R15)(vcpu); \
PPC_LL r16, VCPU_GPR(R16)(vcpu); \
PPC_LL r17, VCPU_GPR(R17)(vcpu); \
PPC_LL r18, VCPU_GPR(R18)(vcpu); \
PPC_LL r19, VCPU_GPR(R19)(vcpu); \
PPC_LL r20, VCPU_GPR(R20)(vcpu); \
PPC_LL r21, VCPU_GPR(R21)(vcpu); \
PPC_LL r22, VCPU_GPR(R22)(vcpu); \
PPC_LL r23, VCPU_GPR(R23)(vcpu); \
PPC_LL r24, VCPU_GPR(R24)(vcpu); \
PPC_LL r25, VCPU_GPR(R25)(vcpu); \
PPC_LL r26, VCPU_GPR(R26)(vcpu); \
PPC_LL r27, VCPU_GPR(R27)(vcpu); \
PPC_LL r28, VCPU_GPR(R28)(vcpu); \
PPC_LL r29, VCPU_GPR(R29)(vcpu); \
PPC_LL r30, VCPU_GPR(R30)(vcpu); \
PPC_LL r31, VCPU_GPR(R31)(vcpu); \
/*****************************************************************************
* *
* Guest entry / exit code that is in kernel module memory (highmem) *
* *
****************************************************************************/
/* Registers:
* r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
kvm_start_entry:
/* Write correct stack frame */
mflr r0
PPC_STL r0,PPC_LR_STKOFF(r1)
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
/* Save r3 (vcpu) */
SAVE_GPR(3, r1)
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
/* Save CR */
mfcr r14
stw r14, _CCR(r1)
/* Save LR */
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
VCPU_LOAD_NVGPRS(r3)
kvm_start_lightweight:
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
/* Copy registers into shadow vcpu so we can access them in real mode */
bl FUNC(kvmppc_copy_to_svcpu)
nop
REST_GPR(3, r1)
#ifdef CONFIG_PPC_BOOK3S_64
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
/* Get the dcbz32 flag */
PPC_LL r0, VCPU_HFLAGS(r3)
rldicl r0, r0, 0, 63 /* r3 &= 1 */
stb r0, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
lbz r4, VCPU_SHAREDBE(r3)
cmpwi r4, 0
ld r5, VCPU_SHARED(r3)
beq sprg3_little_endian
sprg3_big_endian:
#ifdef __BIG_ENDIAN__
ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
ldbrx r4, 0, r5
#endif
b after_sprg3_load
sprg3_little_endian:
#ifdef __LITTLE_ENDIAN__
ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
ldbrx r4, 0, r5
#endif
after_sprg3_load:
mtspr SPRN_SPRG3, r4
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
KVM: PPC: book3s_pr: Simplify transitions between virtual and real mode This simplifies the way that the book3s_pr makes the transition to real mode when entering the guest. We now call kvmppc_entry_trampoline (renamed from kvmppc_rmcall) in the base kernel using a normal function call instead of doing an indirect call through a pointer in the vcpu. If kvm is a module, the module loader takes care of generating a trampoline as it does for other calls to functions outside the module. kvmppc_entry_trampoline then disables interrupts and jumps to kvmppc_handler_trampoline_enter in real mode using an rfi[d]. That then uses the link register as the address to return to (potentially in module space) when the guest exits. This also simplifies the way that we call the Linux interrupt handler when we exit the guest due to an external, decrementer or performance monitor interrupt. Instead of turning on the MMU, then deciding that we need to call the Linux handler and turning the MMU back off again, we now go straight to the handler at the point where we would turn the MMU on. The handler will then return to the virtual-mode code (potentially in the module). Along the way, this moves the setting and clearing of the HID5 DCBZ32 bit into real-mode interrupts-off code, and also makes sure that we clear the MSR[RI] bit before loading values into SRR0/1. The net result is that we no longer need any code addresses to be stored in vcpu->arch. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-07-23 15:41:44 +08:00
bl FUNC(kvmppc_entry_trampoline)
nop
/*
* This is the handler in module memory. It gets jumped at from the
* lowmem trampoline code, so it's basically the guest exit code.
*
*/
/*
* Register usage at this point:
*
* R1 = host R1
* R2 = host R2
* R12 = exit handler id
* R13 = PACA
* SVCPU.* = guest *
* MSR.EE = 1
*
*/
PPC_LL r3, GPR3(r1) /* vcpu pointer */
/*
* kvmppc_copy_from_svcpu can clobber volatile registers, save
* the exit handler id to the vcpu and restore it from there later.
*/
stw r12, VCPU_TRAP(r3)
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
/* Transfer reg values from shadow vcpu back to vcpu struct */
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
bl FUNC(kvmppc_copy_from_svcpu)
nop
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Reload kernel SPRG3 value.
* No need to save guest value as usermode can't modify SPRG3.
*/
powerpc/booke64: Use SPRG7 for VDSO Previously SPRG3 was marked for use by both VDSO and critical interrupts (though critical interrupts were not fully implemented). In commit 8b64a9dfb091f1eca8b7e58da82f1e7d1d5fe0ad ("powerpc/booke64: Use SPRG0/3 scratch for bolted TLB miss & crit int"), Mihai Caraman made an attempt to resolve this conflict by restoring the VDSO value early in the critical interrupt, but this has some issues: - It's incompatible with EXCEPTION_COMMON which restores r13 from the by-then-overwritten scratch (this cost me some debugging time). - It forces critical exceptions to be a special case handled differently from even machine check and debug level exceptions. - It didn't occur to me that it was possible to make this work at all (by doing a final "ld r13, PACA_EXCRIT+EX_R13(r13)") until after I made (most of) this patch. :-) It might be worth investigating using a load rather than SPRG on return from all exceptions (except TLB misses where the scratch never leaves the SPRG) -- it could save a few cycles. Until then, let's stick with SPRG for all exceptions. Since we cannot use SPRG4-7 for scratch without corrupting the state of a KVM guest, move VDSO to SPRG7 on book3e. Since neither SPRG4-7 nor critical interrupts exist on book3s, SPRG3 is still used for VDSO there. Signed-off-by: Scott Wood <scottwood@freescale.com> Cc: Mihai Caraman <mihai.caraman@freescale.com> Cc: Anton Blanchard <anton@samba.org> Cc: Paul Mackerras <paulus@samba.org> Cc: kvm-ppc@vger.kernel.org
2014-03-11 06:29:38 +08:00
ld r3, PACA_SPRG_VDSO(r13)
mtspr SPRN_SPRG_VDSO_WRITE, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
/* R7 = vcpu */
PPC_LL r7, GPR3(r1)
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Currently PR-style KVM keeps the volatile guest register values (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two places, a kmalloc'd struct and in the PACA, and it gets copied back and forth in kvmppc_core_vcpu_load/put(), because the real-mode code can't rely on being able to access the kmalloc'd struct. This changes the code to copy the volatile values into the shadow_vcpu as one of the last things done before entering the guest. Similarly the values are copied back out of the shadow_vcpu to the kvm_vcpu immediately after exiting the guest. We arrange for interrupts to be still disabled at this point so that we can't get preempted on 64-bit and end up copying values from the wrong PACA. This means that the accessor functions in kvm_book3s.h for these registers are greatly simplified, and are same between PR and HV KVM. In places where accesses to shadow_vcpu fields are now replaced by accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs. Finally, on 64-bit, we don't need the kmalloc'd struct at all any more. With this, the time to read the PVR one million times in a loop went from 567.7ms to 575.5ms (averages of 6 values), an increase of about 1.4% for this worse-case test for guest entries and exits. The standard deviation of the measurements is about 11ms, so the difference is only marginally significant statistically. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 12:52:43 +08:00
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
PPC_STL r16, VCPU_GPR(R16)(r7)
PPC_STL r17, VCPU_GPR(R17)(r7)
PPC_STL r18, VCPU_GPR(R18)(r7)
PPC_STL r19, VCPU_GPR(R19)(r7)
PPC_STL r20, VCPU_GPR(R20)(r7)
PPC_STL r21, VCPU_GPR(R21)(r7)
PPC_STL r22, VCPU_GPR(R22)(r7)
PPC_STL r23, VCPU_GPR(R23)(r7)
PPC_STL r24, VCPU_GPR(R24)(r7)
PPC_STL r25, VCPU_GPR(R25)(r7)
PPC_STL r26, VCPU_GPR(R26)(r7)
PPC_STL r27, VCPU_GPR(R27)(r7)
PPC_STL r28, VCPU_GPR(R28)(r7)
PPC_STL r29, VCPU_GPR(R29)(r7)
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
/* Pass the exit number as 2nd argument to kvmppc_handle_exit */
lwz r4, VCPU_TRAP(r7)
/* Restore r3 (vcpu) */
REST_GPR(3, r1)
bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
cmpwi r3, RESUME_GUEST
beq kvm_loop_lightweight
cmpwi r3, RESUME_GUEST_NV
beq kvm_loop_heavyweight
kvm_exit_loop:
PPC_LL r4, _LINK(r1)
mtlr r4
lwz r14, _CCR(r1)
mtcr r14
/* Restore non-volatile host registers (r14 - r31) */
REST_NVGPRS(r1)
addi r1, r1, SWITCH_FRAME_SIZE
blr
kvm_loop_heavyweight:
PPC_LL r4, _LINK(r1)
PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
/* Load vcpu */
REST_GPR(3, r1)
/* Load non-volatile guest state from the vcpu */
VCPU_LOAD_NVGPRS(r3)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
kvm_loop_lightweight:
/* We'll need the vcpu pointer */
REST_GPR(3, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight