2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* This file contains the 64-bit "server" PowerPC variant
|
|
|
|
* of the low level exception handling including exception
|
|
|
|
* vectors, exception return, part of the slb and stab
|
|
|
|
* handling and other fixed offset specific things.
|
|
|
|
*
|
|
|
|
* This file is meant to be #included from head_64.S due to
|
2011-03-31 09:57:33 +08:00
|
|
|
* position dependent assembly.
|
2009-06-03 05:17:38 +08:00
|
|
|
*
|
|
|
|
* Most of this originates from head_64.S and thus has the same
|
|
|
|
* copyright history.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2009-07-15 04:52:52 +08:00
|
|
|
#include <asm/exception-64s.h>
|
2010-11-18 23:06:17 +08:00
|
|
|
#include <asm/ptrace.h>
|
2009-07-15 04:52:52 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* We layout physical memory as follows:
|
|
|
|
* 0x0000 - 0x00ff : Secondary processor spin code
|
|
|
|
* 0x0100 - 0x2fff : pSeries Interrupt prologs
|
|
|
|
* 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
|
|
|
|
* 0x6000 - 0x6fff : Initial (CPU0) segment table
|
|
|
|
* 0x7000 - 0x7fff : FWNMI data area
|
|
|
|
* 0x8000 - : Early init and support code
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the start of the interrupt handlers for pSeries
|
|
|
|
* This code runs with relocation off.
|
|
|
|
* Code from here to __end_interrupts gets copied down to real
|
|
|
|
* address 0x100 when we are running a relocatable kernel.
|
|
|
|
* Therefore any relative branches in this section must only
|
|
|
|
* branch to labels in this section.
|
|
|
|
*/
|
|
|
|
. = 0x100
|
|
|
|
.globl __start_interrupts
|
|
|
|
__start_interrupts:
|
|
|
|
|
2011-01-24 15:42:41 +08:00
|
|
|
.globl system_reset_pSeries;
|
|
|
|
system_reset_pSeries:
|
|
|
|
HMT_MEDIUM;
|
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
#ifdef CONFIG_PPC_P7_NAP
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
/* Running native on arch 2.06 or later, check if we are
|
|
|
|
* waking up from nap. We only handle no state loss and
|
|
|
|
* supervisor state loss. We do -not- handle hypervisor
|
|
|
|
* state loss at this time.
|
|
|
|
*/
|
|
|
|
mfspr r13,SPRN_SRR1
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
rlwinm. r13,r13,47-31,30,31
|
|
|
|
beq 9f
|
|
|
|
|
|
|
|
/* waking up from powersave (nap) state */
|
|
|
|
cmpwi cr1,r13,2
|
2011-01-24 15:42:41 +08:00
|
|
|
/* Total loss of HV state is fatal, we could try to use the
|
|
|
|
* PIR to locate a PACA, then use an emergency stack etc...
|
|
|
|
* but for now, let's just stay stuck here
|
|
|
|
*/
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
bgt cr1,.
|
|
|
|
GET_PACA(r13)
|
|
|
|
|
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
|
|
|
lbz r0,PACAPROCSTART(r13)
|
|
|
|
cmpwi r0,0x80
|
|
|
|
bne 1f
|
2011-12-06 03:35:32 +08:00
|
|
|
li r0,1
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
stb r0,PACAPROCSTART(r13)
|
|
|
|
b kvm_start_guest
|
|
|
|
1:
|
|
|
|
#endif
|
|
|
|
|
|
|
|
beq cr1,2f
|
|
|
|
b .power7_wakeup_noloss
|
|
|
|
2: b .power7_wakeup_loss
|
|
|
|
9:
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:26:11 +08:00
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
2011-01-24 15:42:41 +08:00
|
|
|
#endif /* CONFIG_PPC_P7_NAP */
|
2011-06-29 08:18:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
|
|
|
|
NOTEST, 0x100)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x200
|
2011-06-29 08:18:26 +08:00
|
|
|
machine_check_pSeries_1:
|
|
|
|
/* This is moved out of line as it can be patched by FW, but
|
|
|
|
* some code path might still want to branch into the original
|
|
|
|
* vector
|
|
|
|
*/
|
|
|
|
b machine_check_pSeries
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x300
|
|
|
|
.globl data_access_pSeries
|
|
|
|
data_access_pSeries:
|
|
|
|
HMT_MEDIUM
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
2011-06-29 08:18:26 +08:00
|
|
|
#ifndef CONFIG_POWER4_ONLY
|
2009-06-03 05:17:38 +08:00
|
|
|
BEGIN_FTR_SECTION
|
2011-06-29 08:18:26 +08:00
|
|
|
b data_access_check_stab
|
|
|
|
data_access_not_stab:
|
|
|
|
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
|
|
|
|
#endif
|
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVMTEST_PR, 0x300)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x380
|
|
|
|
.globl data_access_slb_pSeries
|
|
|
|
data_access_slb_pSeries:
|
|
|
|
HMT_MEDIUM
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
|
2009-06-03 05:17:38 +08:00
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_DAR
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/* Keep that around for when we re-implement dynamic VSIDs */
|
|
|
|
cmpdi r3,0
|
|
|
|
bge slb_miss_user_pseries
|
|
|
|
#endif /* __DISABLED__ */
|
2011-06-29 08:18:26 +08:00
|
|
|
mfspr r12,SPRN_SRR1
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifndef CONFIG_RELOCATABLE
|
|
|
|
b .slb_miss_realmode
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* We can't just use a direct branch to .slb_miss_realmode
|
|
|
|
* because the distance from here to there depends on where
|
|
|
|
* the kernel ends up being put.
|
|
|
|
*/
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10, .slb_miss_realmode)
|
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x480
|
|
|
|
.globl instruction_access_slb_pSeries
|
|
|
|
instruction_access_slb_pSeries:
|
|
|
|
HMT_MEDIUM
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
|
2009-06-03 05:17:38 +08:00
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/* Keep that around for when we re-implement dynamic VSIDs */
|
|
|
|
cmpdi r3,0
|
|
|
|
bge slb_miss_user_pseries
|
|
|
|
#endif /* __DISABLED__ */
|
2011-06-29 08:18:26 +08:00
|
|
|
mfspr r12,SPRN_SRR1
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifndef CONFIG_RELOCATABLE
|
|
|
|
b .slb_miss_realmode
|
|
|
|
#else
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10, .slb_miss_realmode)
|
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
/* We open code these as we can't have a ". = x" (even with
|
|
|
|
* x = "." within a feature section
|
|
|
|
*/
|
2011-04-05 12:20:31 +08:00
|
|
|
. = 0x500;
|
2011-04-05 12:27:11 +08:00
|
|
|
.globl hardware_interrupt_pSeries;
|
|
|
|
.globl hardware_interrupt_hv;
|
2011-04-05 12:20:31 +08:00
|
|
|
hardware_interrupt_pSeries:
|
2011-04-05 12:27:11 +08:00
|
|
|
hardware_interrupt_hv:
|
2011-04-05 12:20:31 +08:00
|
|
|
BEGIN_FTR_SECTION
|
2011-06-29 08:18:26 +08:00
|
|
|
_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
|
|
|
|
EXC_HV, SOFTEN_TEST_HV)
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
FTR_SECTION_ELSE
|
|
|
|
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
|
KVM: PPC: book3s_hv: Add support for PPC970-family processors
This adds support for running KVM guests in supervisor mode on those
PPC970 processors that have a usable hypervisor mode. Unfortunately,
Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to
1), but the YDL PowerStation does have a usable hypervisor mode.
There are several differences between the PPC970 and POWER7 in how
guests are managed. These differences are accommodated using the
CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature
bits. Notably, on PPC970:
* The LPCR, LPID or RMOR registers don't exist, and the functions of
those registers are provided by bits in HID4 and one bit in HID0.
* External interrupts can be directed to the hypervisor, but unlike
POWER7 they are masked by MSR[EE] in non-hypervisor modes and use
SRR0/1 not HSRR0/1.
* There is no virtual RMA (VRMA) mode; the guest must use an RMO
(real mode offset) area.
* The TLB entries are not tagged with the LPID, so it is necessary to
flush the whole TLB on partition switch. Furthermore, when switching
partitions we have to ensure that no other CPU is executing the tlbie
or tlbsync instructions in either the old or the new partition,
otherwise undefined behaviour can occur.
* The PMU has 8 counters (PMC registers) rather than 6.
* The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist.
* The SLB has 64 entries rather than 32.
* There is no mediated external interrupt facility, so if we switch to
a guest that has a virtual external interrupt pending but the guest
has MSR[EE] = 0, we have to arrange to have an interrupt pending for
it so that we can get control back once it re-enables interrupts. We
do that by sending ourselves an IPI with smp_send_reschedule after
hard-disabling interrupts.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:40:08 +08:00
|
|
|
EXC_STD, SOFTEN_TEST_HV_201)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:26:11 +08:00
|
|
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
|
2011-06-29 08:18:26 +08:00
|
|
|
MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0xc00
|
|
|
|
.globl system_call_pSeries
|
|
|
|
system_call_pSeries:
|
|
|
|
HMT_MEDIUM
|
2011-06-29 08:18:26 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
GET_PACA(r13)
|
|
|
|
std r9,PACA_EXGEN+EX_R9(r13)
|
|
|
|
std r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
mfcr r9
|
|
|
|
KVMTEST(0xc00)
|
|
|
|
GET_SCRATCH0(r13)
|
|
|
|
#endif
|
2009-06-03 05:17:38 +08:00
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
cmpdi r0,0x1ebe
|
|
|
|
beq- 1f
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
|
|
|
|
mr r9,r13
|
2011-01-20 14:50:21 +08:00
|
|
|
GET_PACA(r13)
|
2009-06-03 05:17:38 +08:00
|
|
|
mfspr r11,SPRN_SRR0
|
|
|
|
mfspr r12,SPRN_SRR1
|
2011-05-09 05:36:44 +08:00
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10, system_call_entry)
|
|
|
|
mtspr SPRN_SRR0,r10
|
|
|
|
ld r10,PACAKMSR(r13)
|
2009-06-03 05:17:38 +08:00
|
|
|
mtspr SPRN_SRR1,r10
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
|
|
|
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/* Fast LE/BE switch system call */
|
|
|
|
1: mfspr r12,SPRN_SRR1
|
|
|
|
xori r12,r12,MSR_LE
|
|
|
|
mtspr SPRN_SRR1,r12
|
|
|
|
rfid /* return to userspace */
|
|
|
|
b .
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
|
2011-04-05 12:27:11 +08:00
|
|
|
|
|
|
|
/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
|
|
|
|
* out of line to handle them
|
|
|
|
*/
|
|
|
|
. = 0xe00
|
|
|
|
b h_data_storage_hv
|
|
|
|
. = 0xe20
|
|
|
|
b h_instr_storage_hv
|
|
|
|
. = 0xe40
|
|
|
|
b emulation_assist_hv
|
|
|
|
. = 0xe50
|
|
|
|
b hmi_exception_hv
|
|
|
|
. = 0xe60
|
|
|
|
b hmi_exception_hv
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* We need to deal with the Altivec unavailable exception
|
|
|
|
* here which is at 0xf20, thus in the middle of the
|
|
|
|
* prolog code of the PerformanceMonitor one. A little
|
|
|
|
* trickery is thus necessary
|
|
|
|
*/
|
2009-10-18 09:24:06 +08:00
|
|
|
performance_monitor_pSeries_1:
|
2009-06-03 05:17:38 +08:00
|
|
|
. = 0xf00
|
|
|
|
b performance_monitor_pSeries
|
|
|
|
|
2009-10-18 09:24:06 +08:00
|
|
|
altivec_unavailable_pSeries_1:
|
2009-06-03 05:17:38 +08:00
|
|
|
. = 0xf20
|
|
|
|
b altivec_unavailable_pSeries
|
|
|
|
|
2009-10-18 09:24:06 +08:00
|
|
|
vsx_unavailable_pSeries_1:
|
2009-06-03 05:17:38 +08:00
|
|
|
. = 0xf40
|
|
|
|
b vsx_unavailable_pSeries
|
|
|
|
|
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
|
|
|
|
|
|
|
. = 0x3000
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
/*** Out of line interrupts support ***/
|
|
|
|
|
2011-06-29 08:18:26 +08:00
|
|
|
/* moved from 0x200 */
|
|
|
|
machine_check_pSeries:
|
|
|
|
.globl machine_check_fwnmi
|
|
|
|
machine_check_fwnmi:
|
|
|
|
HMT_MEDIUM
|
|
|
|
SET_SCRATCH0(r13) /* save r13 */
|
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common,
|
|
|
|
EXC_STD, KVMTEST, 0x200)
|
|
|
|
KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
|
|
|
|
|
|
|
|
#ifndef CONFIG_POWER4_ONLY
|
|
|
|
/* moved from 0x300 */
|
|
|
|
data_access_check_stab:
|
|
|
|
GET_PACA(r13)
|
|
|
|
std r9,PACA_EXSLB+EX_R9(r13)
|
|
|
|
std r10,PACA_EXSLB+EX_R10(r13)
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
mfspr r9,SPRN_DSISR
|
|
|
|
srdi r10,r10,60
|
|
|
|
rlwimi r10,r9,16,0x20
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_PR
|
2011-06-29 08:20:58 +08:00
|
|
|
lbz r9,HSTATE_IN_GUEST(r13)
|
2011-06-29 08:18:26 +08:00
|
|
|
rlwimi r10,r9,8,0x300
|
|
|
|
#endif
|
|
|
|
mfcr r9
|
|
|
|
cmpwi r10,0x2c
|
|
|
|
beq do_stab_bolted_pSeries
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
ld r9,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXSLB+EX_R10(r13)
|
|
|
|
b data_access_not_stab
|
|
|
|
do_stab_bolted_pSeries:
|
|
|
|
std r11,PACA_EXSLB+EX_R11(r13)
|
|
|
|
std r12,PACA_EXSLB+EX_R12(r13)
|
|
|
|
GET_SCRATCH0(r10)
|
|
|
|
std r10,PACA_EXSLB+EX_R13(r13)
|
|
|
|
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
|
|
|
|
#endif /* CONFIG_POWER4_ONLY */
|
|
|
|
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300)
|
|
|
|
KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380)
|
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
|
|
|
|
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
|
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
|
|
|
|
|
|
|
|
.align 7
|
2011-04-05 12:27:11 +08:00
|
|
|
/* moved from 0xe00 */
|
2011-06-29 08:18:26 +08:00
|
|
|
STD_EXCEPTION_HV(., 0xe02, h_data_storage)
|
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
|
|
|
|
STD_EXCEPTION_HV(., 0xe22, h_instr_storage)
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
|
|
|
|
STD_EXCEPTION_HV(., 0xe42, emulation_assist)
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
|
|
|
|
STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* moved from 0xf00 */
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* An interrupt came in while soft-disabled; clear EE in SRR1,
|
|
|
|
* clear paca->hard_enabled and return.
|
|
|
|
*/
|
|
|
|
masked_interrupt:
|
|
|
|
stb r10,PACAHARDIRQEN(r13)
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13)
|
|
|
|
mfspr r10,SPRN_SRR1
|
|
|
|
rldicl r10,r10,48,1 /* clear MSR_EE */
|
|
|
|
rotldi r10,r10,16
|
|
|
|
mtspr SPRN_SRR1,r10
|
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13)
|
2011-04-05 11:59:58 +08:00
|
|
|
GET_SCRATCH0(r13)
|
2009-06-03 05:17:38 +08:00
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
2011-04-05 12:20:31 +08:00
|
|
|
masked_Hinterrupt:
|
|
|
|
stb r10,PACAHARDIRQEN(r13)
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13)
|
|
|
|
mfspr r10,SPRN_HSRR1
|
|
|
|
rldicl r10,r10,48,1 /* clear MSR_EE */
|
|
|
|
rotldi r10,r10,16
|
|
|
|
mtspr SPRN_HSRR1,r10
|
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13)
|
2011-04-05 11:59:58 +08:00
|
|
|
GET_SCRATCH0(r13)
|
2011-04-05 12:20:31 +08:00
|
|
|
hrfid
|
|
|
|
b .
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
|
|
/*
|
|
|
|
* Vectors for the FWNMI option. Share common code.
|
|
|
|
*/
|
|
|
|
.globl system_reset_fwnmi
|
|
|
|
.align 7
|
|
|
|
system_reset_fwnmi:
|
|
|
|
HMT_MEDIUM
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13) /* save r13 */
|
2011-06-29 08:18:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
|
|
|
|
NOTEST, 0x100)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
#endif /* CONFIG_PPC_PSERIES */
|
|
|
|
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/*
|
|
|
|
* This is used for when the SLB miss handler has to go virtual,
|
|
|
|
* which doesn't happen for now anymore but will once we re-implement
|
|
|
|
* dynamic VSIDs for shared page tables
|
|
|
|
*/
|
|
|
|
slb_miss_user_pseries:
|
|
|
|
std r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_R11(r13)
|
|
|
|
std r12,PACA_EXGEN+EX_R12(r13)
|
2011-04-05 11:59:58 +08:00
|
|
|
GET_SCRATCH0(r10)
|
2009-06-03 05:17:38 +08:00
|
|
|
ld r11,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r12,PACA_EXSLB+EX_R3(r13)
|
|
|
|
std r10,PACA_EXGEN+EX_R13(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_R9(r13)
|
|
|
|
std r12,PACA_EXGEN+EX_R3(r13)
|
|
|
|
clrrdi r12,r13,32
|
|
|
|
mfmsr r10
|
|
|
|
mfspr r11,SRR0 /* save SRR0 */
|
|
|
|
ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
|
|
|
|
ori r10,r10,MSR_IR|MSR_DR|MSR_RI
|
|
|
|
mtspr SRR0,r12
|
|
|
|
mfspr r12,SRR1 /* and SRR1 */
|
|
|
|
mtspr SRR1,r10
|
|
|
|
rfid
|
|
|
|
b . /* prevent spec. execution */
|
|
|
|
#endif /* __DISABLED__ */
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl __end_interrupts
|
|
|
|
__end_interrupts:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Code from here down to __end_handlers is invoked from the
|
|
|
|
* exception prologs above. Because the prologs assemble the
|
|
|
|
* addresses of these handlers using the LOAD_HANDLER macro,
|
|
|
|
* which uses an addi instruction, these handlers must be in
|
|
|
|
* the first 32k of the kernel image.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*** Common interrupt handlers ***/
|
|
|
|
|
|
|
|
STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Machine check is different because we use a different
|
|
|
|
* save area: PACA_EXMC instead of PACA_EXGEN.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
.globl machine_check_common
|
|
|
|
machine_check_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
|
|
|
|
FINISH_NAP
|
|
|
|
DISABLE_INTS
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .machine_check_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
|
|
|
|
STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
|
|
|
|
#else
|
|
|
|
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_CBE_RAS
|
|
|
|
STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
|
|
|
|
#endif /* CONFIG_CBE_RAS */
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
system_call_entry:
|
|
|
|
b system_call_common
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we have detected that the kernel stack pointer is bad.
|
|
|
|
* R9 contains the saved CR, r13 points to the paca,
|
|
|
|
* r10 contains the (bad) kernel stack pointer,
|
|
|
|
* r11 and r12 contain the saved SRR0 and SRR1.
|
|
|
|
* We switch to using an emergency stack, save the registers there,
|
|
|
|
* and call kernel_bad_stack(), which panics.
|
|
|
|
*/
|
|
|
|
bad_stack:
|
|
|
|
ld r1,PACAEMERGSP(r13)
|
|
|
|
subi r1,r1,64+INT_FRAME_SIZE
|
|
|
|
std r9,_CCR(r1)
|
|
|
|
std r10,GPR1(r1)
|
|
|
|
std r11,_NIP(r1)
|
|
|
|
std r12,_MSR(r1)
|
|
|
|
mfspr r11,SPRN_DAR
|
|
|
|
mfspr r12,SPRN_DSISR
|
|
|
|
std r11,_DAR(r1)
|
|
|
|
std r12,_DSISR(r1)
|
|
|
|
mflr r10
|
|
|
|
mfctr r11
|
|
|
|
mfxer r12
|
|
|
|
std r10,_LINK(r1)
|
|
|
|
std r11,_CTR(r1)
|
|
|
|
std r12,_XER(r1)
|
|
|
|
SAVE_GPR(0,r1)
|
|
|
|
SAVE_GPR(2,r1)
|
2011-05-02 03:46:44 +08:00
|
|
|
ld r10,EX_R3(r3)
|
|
|
|
std r10,GPR3(r1)
|
|
|
|
SAVE_GPR(4,r1)
|
|
|
|
SAVE_4GPRS(5,r1)
|
|
|
|
ld r9,EX_R9(r3)
|
|
|
|
ld r10,EX_R10(r3)
|
|
|
|
SAVE_2GPRS(9,r1)
|
|
|
|
ld r9,EX_R11(r3)
|
|
|
|
ld r10,EX_R12(r3)
|
|
|
|
ld r11,EX_R13(r3)
|
|
|
|
std r9,GPR11(r1)
|
|
|
|
std r10,GPR12(r1)
|
|
|
|
std r11,GPR13(r1)
|
2011-05-02 03:48:20 +08:00
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
ld r10,EX_CFAR(r3)
|
|
|
|
std r10,ORIG_GPR3(r1)
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
2011-05-02 03:46:44 +08:00
|
|
|
SAVE_8GPRS(14,r1)
|
2009-06-03 05:17:38 +08:00
|
|
|
SAVE_10GPRS(22,r1)
|
|
|
|
lhz r12,PACA_TRAP_SAVE(r13)
|
|
|
|
std r12,_TRAP(r1)
|
|
|
|
addi r11,r1,INT_FRAME_SIZE
|
|
|
|
std r11,0(r1)
|
|
|
|
li r12,0
|
|
|
|
std r12,0(r11)
|
|
|
|
ld r2,PACATOC(r13)
|
2011-05-02 03:46:44 +08:00
|
|
|
ld r11,exception_marker@toc(r2)
|
|
|
|
std r12,RESULT(r1)
|
|
|
|
std r11,STACK_FRAME_OVERHEAD-16(r1)
|
2009-06-03 05:17:38 +08:00
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .kernel_bad_stack
|
|
|
|
b 1b
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here r13 points to the paca, r9 contains the saved CR,
|
|
|
|
* SRR0 and SRR1 are saved in r11 and r12,
|
|
|
|
* r9 - r13 are saved in paca->exgen.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
.globl data_access_common
|
|
|
|
data_access_common:
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_DSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
|
|
|
|
ld r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
lwz r4,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
li r5,0x300
|
|
|
|
b .do_hash_page /* Try to handle as hpte fault */
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
.align 7
|
|
|
|
.globl h_data_storage_common
|
|
|
|
h_data_storage_common:
|
|
|
|
mfspr r10,SPRN_HDAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_HDSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .unknown_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
.globl instruction_access_common
|
|
|
|
instruction_access_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
|
|
|
|
ld r3,_NIP(r1)
|
|
|
|
andis. r4,r12,0x5820
|
|
|
|
li r5,0x400
|
|
|
|
b .do_hash_page /* Try to handle as hpte fault */
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Here is the common SLB miss user that is used when going to virtual
|
|
|
|
* mode for SLB misses, that is currently not used
|
|
|
|
*/
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
.align 7
|
|
|
|
.globl slb_miss_user_common
|
|
|
|
slb_miss_user_common:
|
|
|
|
mflr r10
|
|
|
|
std r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
stw r9,PACA_EXGEN+EX_CCR(r13)
|
|
|
|
std r10,PACA_EXGEN+EX_LR(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_SRR0(r13)
|
|
|
|
bl .slb_allocate_user
|
|
|
|
|
|
|
|
ld r10,PACA_EXGEN+EX_LR(r13)
|
|
|
|
ld r3,PACA_EXGEN+EX_R3(r13)
|
|
|
|
lwz r9,PACA_EXGEN+EX_CCR(r13)
|
|
|
|
ld r11,PACA_EXGEN+EX_SRR0(r13)
|
|
|
|
mtlr r10
|
|
|
|
beq- slb_miss_fault
|
|
|
|
|
|
|
|
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
|
|
|
|
beq- unrecov_user_slb
|
|
|
|
mfmsr r10
|
|
|
|
|
|
|
|
.machine push
|
|
|
|
.machine "power4"
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
.machine pop
|
|
|
|
|
|
|
|
clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
|
|
|
|
mtmsrd r10,1
|
|
|
|
|
|
|
|
mtspr SRR0,r11
|
|
|
|
mtspr SRR1,r12
|
|
|
|
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXGEN+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXGEN+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXGEN+EX_R13(r13)
|
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
|
|
|
slb_miss_fault:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
|
|
|
|
ld r4,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
li r5,0
|
|
|
|
std r4,_DAR(r1)
|
|
|
|
std r5,_DSISR(r1)
|
|
|
|
b handle_page_fault
|
|
|
|
|
|
|
|
unrecov_user_slb:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
|
|
|
|
DISABLE_INTS
|
|
|
|
bl .save_nvgprs
|
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .unrecoverable_exception
|
|
|
|
b 1b
|
|
|
|
|
|
|
|
#endif /* __DISABLED__ */
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* r13 points to the PACA, r9 contains the saved CR,
|
|
|
|
* r12 contain the saved SRR1, SRR0 is still ready for return
|
|
|
|
* r3 has the faulting address
|
|
|
|
* r9 - r13 are saved in paca->exslb.
|
|
|
|
* r3 is saved in paca->slb_r3
|
|
|
|
* We assume we aren't going to take any exceptions during this procedure.
|
|
|
|
*/
|
|
|
|
_GLOBAL(slb_miss_realmode)
|
|
|
|
mflr r10
|
|
|
|
#ifdef CONFIG_RELOCATABLE
|
|
|
|
mtctr r11
|
|
|
|
#endif
|
|
|
|
|
|
|
|
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
|
|
|
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
|
|
|
|
|
|
|
|
bl .slb_allocate_realmode
|
|
|
|
|
|
|
|
/* All done -- return from exception. */
|
|
|
|
|
|
|
|
ld r10,PACA_EXSLB+EX_LR(r13)
|
|
|
|
ld r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
|
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
BEGIN_FW_FTR_SECTION
|
|
|
|
ld r11,PACALPPACAPTR(r13)
|
|
|
|
ld r11,LPPACASRR0(r11) /* get SRR0 value */
|
|
|
|
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
|
|
|
|
#endif /* CONFIG_PPC_ISERIES */
|
|
|
|
|
|
|
|
mtlr r10
|
|
|
|
|
|
|
|
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
|
|
|
|
beq- 2f
|
|
|
|
|
|
|
|
.machine push
|
|
|
|
.machine "power4"
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
|
|
|
.machine pop
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
BEGIN_FW_FTR_SECTION
|
|
|
|
mtspr SPRN_SRR0,r11
|
|
|
|
mtspr SPRN_SRR1,r12
|
|
|
|
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
|
|
|
|
#endif /* CONFIG_PPC_ISERIES */
|
|
|
|
ld r9,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXSLB+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXSLB+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXSLB+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXSLB+EX_R13(r13)
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
|
|
|
|
|
|
|
2:
|
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
BEGIN_FW_FTR_SECTION
|
|
|
|
b unrecov_slb
|
|
|
|
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
|
|
|
|
#endif /* CONFIG_PPC_ISERIES */
|
|
|
|
mfspr r11,SPRN_SRR0
|
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10,unrecov_slb)
|
|
|
|
mtspr SPRN_SRR0,r10
|
|
|
|
ld r10,PACAKMSR(r13)
|
|
|
|
mtspr SPRN_SRR1,r10
|
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
|
|
|
unrecov_slb:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
|
|
|
|
DISABLE_INTS
|
|
|
|
bl .save_nvgprs
|
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .unrecoverable_exception
|
|
|
|
b 1b
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl hardware_interrupt_common
|
|
|
|
.globl hardware_interrupt_entry
|
|
|
|
hardware_interrupt_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
|
|
|
|
FINISH_NAP
|
|
|
|
hardware_interrupt_entry:
|
|
|
|
DISABLE_INTS
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
bl .ppc64_runlatch_on
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .do_IRQ
|
|
|
|
b .ret_from_except_lite
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_970_NAP
|
|
|
|
power4_fixup_nap:
|
|
|
|
andc r9,r9,r10
|
|
|
|
std r9,TI_LOCAL_FLAGS(r11)
|
|
|
|
ld r10,_LINK(r1) /* make idle task do the */
|
|
|
|
std r10,_NIP(r1) /* equivalent of a blr */
|
|
|
|
blr
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl alignment_common
|
|
|
|
alignment_common:
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_DSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
|
|
|
|
ld r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
lwz r4,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
std r3,_DAR(r1)
|
|
|
|
std r4,_DSISR(r1)
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ENABLE_INTS
|
|
|
|
bl .alignment_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl program_check_common
|
|
|
|
program_check_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2012-02-14 04:42:18 +08:00
|
|
|
DISABLE_INTS
|
2009-06-03 05:17:38 +08:00
|
|
|
bl .program_check_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl fp_unavailable_common
|
|
|
|
fp_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
|
|
|
|
bne 1f /* if from user, just load it up */
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ENABLE_INTS
|
|
|
|
bl .kernel_fp_unavailable_exception
|
|
|
|
BUG_OPCODE
|
|
|
|
1: bl .load_up_fpu
|
|
|
|
b fast_exception_return
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl altivec_unavailable_common
|
|
|
|
altivec_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
beq 1f
|
|
|
|
bl .load_up_altivec
|
|
|
|
b fast_exception_return
|
|
|
|
1:
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
|
|
|
|
#endif
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ENABLE_INTS
|
|
|
|
bl .altivec_unavailable_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl vsx_unavailable_common
|
|
|
|
vsx_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
bne .load_up_vsx
|
|
|
|
1:
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
|
|
|
|
#endif
|
|
|
|
bl .save_nvgprs
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ENABLE_INTS
|
|
|
|
bl .vsx_unavailable_exception
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl __end_handlers
|
|
|
|
__end_handlers:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return from an exception with minimal checks.
|
|
|
|
* The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
|
|
|
|
* If interrupts have been enabled, or anything has been
|
|
|
|
* done that might have changed the scheduling status of
|
|
|
|
* any task or sent any task a signal, you should use
|
|
|
|
* ret_from_except or ret_from_except_lite instead of this.
|
|
|
|
*/
|
|
|
|
fast_exc_return_irq: /* restores irq state too */
|
|
|
|
ld r3,SOFTE(r1)
|
|
|
|
TRACE_AND_RESTORE_IRQ(r3);
|
|
|
|
ld r12,_MSR(r1)
|
|
|
|
rldicl r4,r12,49,63 /* get MSR_EE to LSB */
|
|
|
|
stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
|
|
|
|
b 1f
|
|
|
|
|
|
|
|
.globl fast_exception_return
|
|
|
|
fast_exception_return:
|
|
|
|
ld r12,_MSR(r1)
|
|
|
|
1: ld r11,_NIP(r1)
|
|
|
|
andi. r3,r12,MSR_RI /* check if RI is set */
|
|
|
|
beq- unrecov_fer
|
|
|
|
|
|
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
|
|
|
andi. r3,r12,MSR_PR
|
|
|
|
beq 2f
|
|
|
|
ACCOUNT_CPU_USER_EXIT(r3, r4)
|
|
|
|
2:
|
|
|
|
#endif
|
|
|
|
|
|
|
|
ld r3,_CCR(r1)
|
|
|
|
ld r4,_LINK(r1)
|
|
|
|
ld r5,_CTR(r1)
|
|
|
|
ld r6,_XER(r1)
|
|
|
|
mtcr r3
|
|
|
|
mtlr r4
|
|
|
|
mtctr r5
|
|
|
|
mtxer r6
|
|
|
|
REST_GPR(0, r1)
|
|
|
|
REST_8GPRS(2, r1)
|
|
|
|
|
|
|
|
mfmsr r10
|
|
|
|
rldicl r10,r10,48,1 /* clear EE */
|
|
|
|
rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
|
|
|
|
mtmsrd r10,1
|
|
|
|
|
|
|
|
mtspr SPRN_SRR1,r12
|
|
|
|
mtspr SPRN_SRR0,r11
|
|
|
|
REST_4GPRS(10, r1)
|
|
|
|
ld r1,GPR1(r1)
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
|
|
|
|
|
|
|
unrecov_fer:
|
|
|
|
bl .save_nvgprs
|
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .unrecoverable_exception
|
|
|
|
b 1b
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hash table stuff
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
_STATIC(do_hash_page)
|
|
|
|
std r3,_DAR(r1)
|
|
|
|
std r4,_DSISR(r1)
|
|
|
|
|
2010-03-30 07:59:25 +08:00
|
|
|
andis. r0,r4,0xa410 /* weird error? */
|
2009-06-03 05:17:38 +08:00
|
|
|
bne- handle_page_fault /* if not, try to insert a HPTE */
|
2010-03-30 07:59:25 +08:00
|
|
|
andis. r0,r4,DSISR_DABRMATCH@h
|
|
|
|
bne- handle_dabr_fault
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
andis. r0,r4,0x0020 /* Is it a segment table fault? */
|
|
|
|
bne- do_ste_alloc /* If so handle it */
|
2011-04-07 03:48:50 +08:00
|
|
|
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
clrrdi r11,r1,THREAD_SHIFT
|
|
|
|
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
|
|
|
|
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
|
|
|
|
bne 77f /* then don't call hash_page now */
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* On iSeries, we soft-disable interrupts here, then
|
|
|
|
* hard-enable interrupts so that the hash_page code can spin on
|
|
|
|
* the hash_table_lock without problems on a shared processor.
|
|
|
|
*/
|
|
|
|
DISABLE_INTS
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Currently, trace_hardirqs_off() will be called by DISABLE_INTS
|
|
|
|
* and will clobber volatile registers when irq tracing is enabled
|
|
|
|
* so we need to reload them. It may be possible to be smarter here
|
|
|
|
* and move the irq tracing elsewhere but let's keep it simple for
|
|
|
|
* now
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
ld r3,_DAR(r1)
|
|
|
|
ld r4,_DSISR(r1)
|
|
|
|
ld r5,_TRAP(r1)
|
|
|
|
ld r12,_MSR(r1)
|
|
|
|
clrrdi r5,r5,4
|
|
|
|
#endif /* CONFIG_TRACE_IRQFLAGS */
|
|
|
|
/*
|
|
|
|
* We need to set the _PAGE_USER bit if MSR_PR is set or if we are
|
|
|
|
* accessing a userspace segment (even from the kernel). We assume
|
|
|
|
* kernel addresses always have the high bit set.
|
|
|
|
*/
|
|
|
|
rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
|
|
|
|
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
|
|
|
|
orc r0,r12,r0 /* MSR_PR | ~high_bit */
|
|
|
|
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
|
|
|
|
ori r4,r4,1 /* add _PAGE_PRESENT */
|
|
|
|
rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* r3 contains the faulting address
|
|
|
|
* r4 contains the required access permissions
|
|
|
|
* r5 contains the trap number
|
|
|
|
*
|
|
|
|
* at return r3 = 0 for success
|
|
|
|
*/
|
|
|
|
bl .hash_page /* build HPTE if possible */
|
|
|
|
cmpdi r3,0 /* see if hash_page succeeded */
|
|
|
|
|
|
|
|
BEGIN_FW_FTR_SECTION
|
|
|
|
/*
|
|
|
|
* If we had interrupts soft-enabled at the point where the
|
|
|
|
* DSI/ISI occurred, and an interrupt came in during hash_page,
|
|
|
|
* handle it now.
|
|
|
|
* We jump to ret_from_except_lite rather than fast_exception_return
|
|
|
|
* because ret_from_except_lite will check for and handle pending
|
|
|
|
* interrupts if necessary.
|
|
|
|
*/
|
|
|
|
beq 13f
|
|
|
|
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
|
|
|
|
|
|
|
|
BEGIN_FW_FTR_SECTION
|
|
|
|
/*
|
|
|
|
* Here we have interrupts hard-disabled, so it is sufficient
|
|
|
|
* to restore paca->{soft,hard}_enable and get out.
|
|
|
|
*/
|
|
|
|
beq fast_exc_return_irq /* Return from exception on success */
|
|
|
|
END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
|
|
|
|
|
|
|
|
/* For a hash failure, we don't bother re-enabling interrupts */
|
|
|
|
ble- 12f
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_page couldn't handle it, set soft interrupt enable back
|
2010-10-07 21:08:55 +08:00
|
|
|
* to what it was before the trap. Note that .arch_local_irq_restore
|
2009-06-03 05:17:38 +08:00
|
|
|
* handles any interrupts pending at this point.
|
|
|
|
*/
|
|
|
|
ld r3,SOFTE(r1)
|
|
|
|
TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
|
2010-10-07 21:08:55 +08:00
|
|
|
bl .arch_local_irq_restore
|
2009-06-03 05:17:38 +08:00
|
|
|
b 11f
|
|
|
|
|
2010-03-30 07:59:25 +08:00
|
|
|
/* We have a data breakpoint exception - handle it */
|
|
|
|
handle_dabr_fault:
|
2010-06-15 14:05:19 +08:00
|
|
|
bl .save_nvgprs
|
2010-03-30 07:59:25 +08:00
|
|
|
ld r4,_DAR(r1)
|
|
|
|
ld r5,_DSISR(r1)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .do_dabr
|
|
|
|
b .ret_from_except_lite
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/* Here we have a page fault that hash_page can't handle. */
|
|
|
|
handle_page_fault:
|
|
|
|
ENABLE_INTS
|
|
|
|
11: ld r4,_DAR(r1)
|
|
|
|
ld r5,_DSISR(r1)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl .do_page_fault
|
|
|
|
cmpdi r3,0
|
|
|
|
beq+ 13f
|
|
|
|
bl .save_nvgprs
|
|
|
|
mr r5,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
lwz r4,_DAR(r1)
|
|
|
|
bl .bad_page_fault
|
|
|
|
b .ret_from_except
|
|
|
|
|
|
|
|
13: b .ret_from_except_lite
|
|
|
|
|
|
|
|
/* We have a page fault that hash_page could handle but HV refused
|
|
|
|
* the PTE insertion
|
|
|
|
*/
|
|
|
|
12: bl .save_nvgprs
|
|
|
|
mr r5,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ld r4,_DAR(r1)
|
|
|
|
bl .low_hash_fault
|
|
|
|
b .ret_from_except
|
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
/*
|
|
|
|
* We come here as a result of a DSI at a point where we don't want
|
|
|
|
* to call hash_page, such as when we are accessing memory (possibly
|
|
|
|
* user memory) inside a PMU interrupt that occurred while interrupts
|
|
|
|
* were soft-disabled. We want to invoke the exception handler for
|
|
|
|
* the access, or panic if there isn't a handler.
|
|
|
|
*/
|
|
|
|
77: bl .save_nvgprs
|
|
|
|
mr r4,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
li r5,SIGSEGV
|
|
|
|
bl .bad_page_fault
|
|
|
|
b .ret_from_except
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/* here we have a segment miss */
|
|
|
|
do_ste_alloc:
|
|
|
|
bl .ste_allocate /* try to insert stab entry */
|
|
|
|
cmpdi r3,0
|
|
|
|
bne- handle_page_fault
|
|
|
|
b fast_exception_return
|
|
|
|
|
|
|
|
/*
|
|
|
|
* r13 points to the PACA, r9 contains the saved CR,
|
|
|
|
* r11 and r12 contain the saved SRR0 and SRR1.
|
|
|
|
* r9 - r13 are saved in paca->exslb.
|
|
|
|
* We assume we aren't going to take any exceptions during this procedure.
|
|
|
|
* We assume (DAR >> 60) == 0xc.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
_GLOBAL(do_stab_bolted)
|
|
|
|
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
|
|
|
std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
|
|
|
|
|
|
|
|
/* Hash to the primary group */
|
|
|
|
ld r10,PACASTABVIRT(r13)
|
|
|
|
mfspr r11,SPRN_DAR
|
|
|
|
srdi r11,r11,28
|
|
|
|
rldimi r10,r11,7,52 /* r10 = first ste of the group */
|
|
|
|
|
|
|
|
/* Calculate VSID */
|
|
|
|
/* This is a kernel address, so protovsid = ESID */
|
|
|
|
ASM_VSID_SCRAMBLE(r11, r9, 256M)
|
|
|
|
rldic r9,r11,12,16 /* r9 = vsid << 12 */
|
|
|
|
|
|
|
|
/* Search the primary group for a free entry */
|
|
|
|
1: ld r11,0(r10) /* Test valid bit of the current ste */
|
|
|
|
andi. r11,r11,0x80
|
|
|
|
beq 2f
|
|
|
|
addi r10,r10,16
|
|
|
|
andi. r11,r10,0x70
|
|
|
|
bne 1b
|
|
|
|
|
|
|
|
/* Stick for only searching the primary group for now. */
|
|
|
|
/* At least for now, we use a very simple random castout scheme */
|
|
|
|
/* Use the TB as a random number ; OR in 1 to avoid entry 0 */
|
|
|
|
mftb r11
|
|
|
|
rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
|
|
|
|
ori r11,r11,0x10
|
|
|
|
|
|
|
|
/* r10 currently points to an ste one past the group of interest */
|
|
|
|
/* make it point to the randomly selected entry */
|
|
|
|
subi r10,r10,128
|
|
|
|
or r10,r10,r11 /* r10 is the entry to invalidate */
|
|
|
|
|
|
|
|
isync /* mark the entry invalid */
|
|
|
|
ld r11,0(r10)
|
|
|
|
rldicl r11,r11,56,1 /* clear the valid bit */
|
|
|
|
rotldi r11,r11,8
|
|
|
|
std r11,0(r10)
|
|
|
|
sync
|
|
|
|
|
|
|
|
clrrdi r11,r11,28 /* Get the esid part of the ste */
|
|
|
|
slbie r11
|
|
|
|
|
|
|
|
2: std r9,8(r10) /* Store the vsid part of the ste */
|
|
|
|
eieio
|
|
|
|
|
|
|
|
mfspr r11,SPRN_DAR /* Get the new esid */
|
|
|
|
clrrdi r11,r11,28 /* Permits a full 32b of ESID */
|
|
|
|
ori r11,r11,0x90 /* Turn on valid and kp */
|
|
|
|
std r11,0(r10) /* Put new entry back into the stab */
|
|
|
|
|
|
|
|
sync
|
|
|
|
|
|
|
|
/* All done -- return from exception. */
|
|
|
|
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
|
|
|
|
ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
|
|
|
|
|
|
|
|
andi. r10,r12,MSR_RI
|
|
|
|
beq- unrecov_slb
|
|
|
|
|
|
|
|
mtcrf 0x80,r9 /* restore CR */
|
|
|
|
|
|
|
|
mfmsr r10
|
|
|
|
clrrdi r10,r10,2
|
|
|
|
mtmsrd r10,1
|
|
|
|
|
|
|
|
mtspr SPRN_SRR0,r11
|
|
|
|
mtspr SPRN_SRR1,r12
|
|
|
|
ld r9,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXSLB+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXSLB+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXSLB+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXSLB+EX_R13(r13)
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
|
|
|
|
2011-09-20 01:45:04 +08:00
|
|
|
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Data area reserved for FWNMI option.
|
|
|
|
* This address (0x7000) is fixed by the RPA.
|
|
|
|
*/
|
|
|
|
.= 0x7000
|
|
|
|
.globl fwnmi_data_area
|
|
|
|
fwnmi_data_area:
|
2011-09-20 01:45:04 +08:00
|
|
|
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* iSeries does not use the FWNMI stuff, so it is safe to put
|
|
|
|
* this here, even if we later allow kernels that will boot on
|
|
|
|
* both pSeries and iSeries */
|
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
. = LPARMAP_PHYS
|
|
|
|
.globl xLparMap
|
|
|
|
xLparMap:
|
|
|
|
.quad HvEsidsToMap /* xNumberEsids */
|
|
|
|
.quad HvRangesToMap /* xNumberRanges */
|
|
|
|
.quad STAB0_PAGE /* xSegmentTableOffs */
|
|
|
|
.zero 40 /* xRsvd */
|
|
|
|
/* xEsids (HvEsidsToMap entries of 2 quads) */
|
|
|
|
.quad PAGE_OFFSET_ESID /* xKernelEsid */
|
|
|
|
.quad PAGE_OFFSET_VSID /* xKernelVsid */
|
|
|
|
.quad VMALLOC_START_ESID /* xKernelEsid */
|
|
|
|
.quad VMALLOC_START_VSID /* xKernelVsid */
|
|
|
|
/* xRanges (HvRangesToMap entries of 3 quads) */
|
|
|
|
.quad HvPagesToMap /* xPages */
|
|
|
|
.quad 0 /* xOffset */
|
|
|
|
.quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
|
|
|
|
|
|
|
|
#endif /* CONFIG_PPC_ISERIES */
|
|
|
|
|
2011-09-20 01:45:04 +08:00
|
|
|
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
|
|
|
|
/* pseries and powernv need to keep the whole page from
|
|
|
|
* 0x7000 to 0x8000 free for use by the firmware
|
|
|
|
*/
|
2009-06-03 05:17:38 +08:00
|
|
|
. = 0x8000
|
2011-09-20 01:45:04 +08:00
|
|
|
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
|
2011-03-07 02:09:07 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Space for CPU0's segment table.
|
|
|
|
*
|
|
|
|
* On iSeries, the hypervisor must fill in at least one entry before
|
|
|
|
* we get control (with relocate on). The address is given to the hv
|
|
|
|
* as a page number (see xLparMap above), so this must be at a
|
|
|
|
* fixed address (the linker can't compute (u64)&initial_stab >>
|
|
|
|
* PAGE_SHIFT).
|
|
|
|
*/
|
|
|
|
. = STAB0_OFFSET /* 0x8000 */
|
|
|
|
.globl initial_stab
|
|
|
|
initial_stab:
|
|
|
|
.space 4096
|
2011-09-20 01:45:04 +08:00
|
|
|
#ifdef CONFIG_PPC_POWERNV
|
|
|
|
_GLOBAL(opal_mc_secondary_handler)
|
|
|
|
HMT_MEDIUM
|
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
GET_PACA(r13)
|
|
|
|
clrldi r3,r3,2
|
|
|
|
tovirt(r3,r3)
|
|
|
|
std r3,PACA_OPAL_MC_EVT(r13)
|
|
|
|
ld r13,OPAL_MC_SRR0(r3)
|
|
|
|
mtspr SPRN_SRR0,r13
|
|
|
|
ld r13,OPAL_MC_SRR1(r3)
|
|
|
|
mtspr SPRN_SRR1,r13
|
|
|
|
ld r3,OPAL_MC_GPR3(r3)
|
|
|
|
GET_SCRATCH0(r13)
|
|
|
|
b machine_check_pSeries
|
|
|
|
#endif /* CONFIG_PPC_POWERNV */
|