2008-04-17 12:28:09 +08:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
* Copyright IBM Corp. 2007
|
|
|
|
*
|
|
|
|
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
|
|
|
|
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/vmalloc.h>
|
2009-11-02 20:02:31 +08:00
|
|
|
#include <linux/hrtimer.h>
|
2008-04-17 12:28:09 +08:00
|
|
|
#include <linux/fs.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2008-04-17 12:28:09 +08:00
|
|
|
#include <asm/cputable.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <asm/kvm_ppc.h>
|
2008-07-26 02:54:52 +08:00
|
|
|
#include <asm/tlbflush.h>
|
2008-12-03 05:51:57 +08:00
|
|
|
#include "timing.h"
|
2008-12-23 11:57:26 +08:00
|
|
|
#include "../mm/mmu_decl.h"
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2009-06-18 22:47:27 +08:00
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
#include "trace.h"
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
|
|
|
{
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
2010-07-29 20:47:43 +08:00
|
|
|
return !(v->arch.shared->msr & MSR_WE) ||
|
|
|
|
!!(v->arch.pending_exceptions);
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#else
|
|
|
|
return 1;
|
|
|
|
#endif
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2010-07-29 20:47:48 +08:00
|
|
|
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int nr = kvmppc_get_gpr(vcpu, 11);
|
|
|
|
int r;
|
|
|
|
unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
|
|
|
|
unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
|
|
|
|
unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
|
|
|
|
unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
|
|
|
|
unsigned long r2 = 0;
|
|
|
|
|
|
|
|
if (!(vcpu->arch.shared->msr & MSR_SF)) {
|
|
|
|
/* 32 bit mode */
|
|
|
|
param1 &= 0xffffffff;
|
|
|
|
param2 &= 0xffffffff;
|
|
|
|
param3 &= 0xffffffff;
|
|
|
|
param4 &= 0xffffffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (nr) {
|
2010-07-29 20:47:55 +08:00
|
|
|
case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
|
|
|
|
{
|
|
|
|
vcpu->arch.magic_page_pa = param1;
|
|
|
|
vcpu->arch.magic_page_ea = param2;
|
|
|
|
|
2010-08-03 08:29:27 +08:00
|
|
|
r2 = KVM_MAGIC_FEAT_SR;
|
2010-08-03 17:32:56 +08:00
|
|
|
|
2010-07-29 20:47:55 +08:00
|
|
|
r = HC_EV_SUCCESS;
|
|
|
|
break;
|
|
|
|
}
|
2010-07-29 20:47:48 +08:00
|
|
|
case HC_VENDOR_KVM | KVM_HC_FEATURES:
|
|
|
|
r = HC_EV_SUCCESS;
|
2011-06-15 07:34:41 +08:00
|
|
|
#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
|
|
|
|
/* XXX Missing magic page on 44x */
|
2010-07-29 20:47:55 +08:00
|
|
|
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
|
|
|
|
#endif
|
2010-07-29 20:47:48 +08:00
|
|
|
|
|
|
|
/* Second return value is in r4 */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
r = HC_EV_UNIMPLEMENTED;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-08-03 17:32:56 +08:00
|
|
|
kvmppc_set_gpr(vcpu, 4, r2);
|
|
|
|
|
2010-07-29 20:47:48 +08:00
|
|
|
return r;
|
|
|
|
}
|
2008-04-17 12:28:09 +08:00
|
|
|
|
|
|
|
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
enum emulation_result er;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
er = kvmppc_emulate_instruction(run, vcpu);
|
|
|
|
switch (er) {
|
|
|
|
case EMULATE_DONE:
|
|
|
|
/* Future optimization: only reload non-volatiles if they were
|
|
|
|
* actually modified. */
|
|
|
|
r = RESUME_GUEST_NV;
|
|
|
|
break;
|
|
|
|
case EMULATE_DO_MMIO:
|
|
|
|
run->exit_reason = KVM_EXIT_MMIO;
|
|
|
|
/* We must reload nonvolatiles because "update" load/store
|
|
|
|
* instructions modify register state. */
|
|
|
|
/* Future optimization: only reload non-volatiles if they were
|
|
|
|
* actually modified. */
|
|
|
|
r = RESUME_HOST_NV;
|
|
|
|
break;
|
|
|
|
case EMULATE_FAIL:
|
|
|
|
/* XXX Deliver Program interrupt to guest. */
|
|
|
|
printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
|
2010-04-16 06:11:40 +08:00
|
|
|
kvmppc_get_last_inst(vcpu));
|
2008-04-17 12:28:09 +08:00
|
|
|
r = RESUME_HOST;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2009-09-15 17:37:46 +08:00
|
|
|
int kvm_arch_hardware_enable(void *garbage)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
2009-09-15 17:37:46 +08:00
|
|
|
return 0;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_hardware_disable(void *garbage)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_hardware_setup(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_hardware_unsetup(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_check_processor_compat(void *rtn)
|
|
|
|
{
|
2008-11-05 23:36:14 +08:00
|
|
|
*(int *)rtn = kvmppc_core_check_processor_compat();
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2010-11-10 00:02:49 +08:00
|
|
|
int kvm_arch_init_vm(struct kvm *kvm)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
2011-06-29 08:19:22 +08:00
|
|
|
return kvmppc_core_init_vm(kvm);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2010-11-10 00:02:49 +08:00
|
|
|
void kvm_arch_destroy_vm(struct kvm *kvm)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
|
|
|
unsigned int i;
|
2009-06-09 20:56:29 +08:00
|
|
|
struct kvm_vcpu *vcpu;
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2009-06-09 20:56:29 +08:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
|
kvm_arch_vcpu_free(vcpu);
|
|
|
|
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
|
|
|
|
kvm->vcpus[i] = NULL;
|
|
|
|
|
|
|
|
atomic_set(&kvm->online_vcpus, 0);
|
2011-06-29 08:19:22 +08:00
|
|
|
|
|
|
|
kvmppc_core_destroy_vm(kvm);
|
|
|
|
|
2009-06-09 20:56:29 +08:00
|
|
|
mutex_unlock(&kvm->lock);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2009-01-06 10:03:02 +08:00
|
|
|
void kvm_arch_sync_events(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
int kvm_dev_ioctl_check_extension(long ext)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
switch (ext) {
|
2011-04-28 06:24:21 +08:00
|
|
|
#ifdef CONFIG_BOOKE
|
|
|
|
case KVM_CAP_PPC_BOOKE_SREGS:
|
|
|
|
#else
|
2009-11-30 11:02:02 +08:00
|
|
|
case KVM_CAP_PPC_SEGSTATE:
|
2011-04-28 06:24:21 +08:00
|
|
|
#endif
|
2010-03-25 04:48:18 +08:00
|
|
|
case KVM_CAP_PPC_UNSET_IRQ:
|
2010-08-30 19:50:45 +08:00
|
|
|
case KVM_CAP_PPC_IRQ_LEVEL:
|
2010-03-25 04:48:29 +08:00
|
|
|
case KVM_CAP_ENABLE_CAP:
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
r = 1;
|
|
|
|
break;
|
|
|
|
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
|
|
|
case KVM_CAP_PPC_PAIRED_SINGLES:
|
2010-03-25 04:48:30 +08:00
|
|
|
case KVM_CAP_PPC_OSI:
|
2010-07-29 20:48:08 +08:00
|
|
|
case KVM_CAP_PPC_GET_PVINFO:
|
2009-11-30 11:02:02 +08:00
|
|
|
r = 1;
|
|
|
|
break;
|
2008-05-30 22:05:56 +08:00
|
|
|
case KVM_CAP_COALESCED_MMIO:
|
|
|
|
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
|
|
|
|
break;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#endif
|
2008-04-17 12:28:09 +08:00
|
|
|
default:
|
|
|
|
r = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
long kvm_arch_dev_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2009-12-24 00:35:18 +08:00
|
|
|
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|
|
|
struct kvm_memory_slot *memslot,
|
|
|
|
struct kvm_memory_slot old,
|
|
|
|
struct kvm_userspace_memory_region *mem,
|
|
|
|
int user_alloc)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
2011-06-29 08:19:22 +08:00
|
|
|
return kvmppc_core_prepare_memory_region(kvm, mem);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2009-12-24 00:35:18 +08:00
|
|
|
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|
|
|
struct kvm_userspace_memory_region *mem,
|
|
|
|
struct kvm_memory_slot old,
|
|
|
|
int user_alloc)
|
|
|
|
{
|
2011-06-29 08:19:22 +08:00
|
|
|
kvmppc_core_commit_memory_region(kvm, mem);
|
2009-12-24 00:35:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-11 07:49:31 +08:00
|
|
|
void kvm_arch_flush_shadow(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
|
|
|
{
|
2008-12-03 05:51:57 +08:00
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
vcpu = kvmppc_core_vcpu_create(kvm, id);
|
2010-03-09 14:13:43 +08:00
|
|
|
if (!IS_ERR(vcpu))
|
|
|
|
kvmppc_create_vcpu_debugfs(vcpu, id);
|
2008-12-03 05:51:57 +08:00
|
|
|
return vcpu;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2010-02-22 23:52:14 +08:00
|
|
|
/* Make sure we're not using the vcpu anymore */
|
|
|
|
hrtimer_cancel(&vcpu->arch.dec_timer);
|
|
|
|
tasklet_kill(&vcpu->arch.tasklet);
|
|
|
|
|
2008-12-03 05:51:57 +08:00
|
|
|
kvmppc_remove_vcpu_debugfs(vcpu);
|
2008-11-05 23:36:18 +08:00
|
|
|
kvmppc_core_vcpu_free(vcpu);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
kvm_arch_vcpu_free(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2008-11-05 23:36:14 +08:00
|
|
|
return kvmppc_core_pending_dec(vcpu);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_decrementer_func(unsigned long data)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
|
|
|
|
2008-11-05 23:36:14 +08:00
|
|
|
kvmppc_core_queue_dec(vcpu);
|
2008-04-26 06:55:49 +08:00
|
|
|
|
|
|
|
if (waitqueue_active(&vcpu->wq)) {
|
|
|
|
wake_up_interruptible(&vcpu->wq);
|
|
|
|
vcpu->stat.halt_wakeup++;
|
|
|
|
}
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2009-11-02 20:02:31 +08:00
|
|
|
/*
|
|
|
|
* low level hrtimer wake routine. Because this runs in hardirq context
|
|
|
|
* we schedule a tasklet to do the real work.
|
|
|
|
*/
|
|
|
|
enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
|
|
|
vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
|
|
|
|
tasklet_schedule(&vcpu->arch.tasklet);
|
|
|
|
|
|
|
|
return HRTIMER_NORESTART;
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2009-11-02 20:02:31 +08:00
|
|
|
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
|
|
|
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
|
|
|
|
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
vcpu->arch.dec_expires = ~(u64)0;
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2011-03-25 13:02:13 +08:00
|
|
|
#ifdef CONFIG_KVM_EXIT_TIMING
|
|
|
|
mutex_init(&vcpu->arch.exit_timing_lock);
|
|
|
|
#endif
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2009-01-04 06:22:59 +08:00
|
|
|
kvmppc_mmu_destroy(vcpu);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
|
|
{
|
2011-04-28 06:24:10 +08:00
|
|
|
#ifdef CONFIG_BOOKE
|
|
|
|
/*
|
|
|
|
* vrsave (formerly usprg0) isn't used by Linux, but may
|
|
|
|
* be used by the guest.
|
|
|
|
*
|
|
|
|
* On non-booke this is associated with Altivec and
|
|
|
|
* is handled by code in book3s.c.
|
|
|
|
*/
|
|
|
|
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
|
|
|
|
#endif
|
2008-11-05 23:36:14 +08:00
|
|
|
kvmppc_core_vcpu_load(vcpu, cpu);
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
vcpu->cpu = smp_processor_id();
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2008-11-05 23:36:14 +08:00
|
|
|
kvmppc_core_vcpu_put(vcpu);
|
2011-04-28 06:24:10 +08:00
|
|
|
#ifdef CONFIG_BOOKE
|
|
|
|
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
|
|
|
|
#endif
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
vcpu->cpu = -1;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2008-12-15 20:52:10 +08:00
|
|
|
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
2009-01-05 03:51:09 +08:00
|
|
|
struct kvm_guest_debug *dbg)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
2009-01-05 03:51:09 +08:00
|
|
|
return -EINVAL;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_run *run)
|
|
|
|
{
|
2010-01-08 09:58:01 +08:00
|
|
|
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_run *run)
|
|
|
|
{
|
2010-06-11 19:23:26 +08:00
|
|
|
u64 uninitialized_var(gpr);
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2010-01-08 09:58:01 +08:00
|
|
|
if (run->mmio.len > sizeof(gpr)) {
|
2008-04-17 12:28:09 +08:00
|
|
|
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vcpu->arch.mmio_is_bigendian) {
|
|
|
|
switch (run->mmio.len) {
|
2010-02-19 18:00:29 +08:00
|
|
|
case 8: gpr = *(u64 *)run->mmio.data; break;
|
2010-01-08 09:58:01 +08:00
|
|
|
case 4: gpr = *(u32 *)run->mmio.data; break;
|
|
|
|
case 2: gpr = *(u16 *)run->mmio.data; break;
|
|
|
|
case 1: gpr = *(u8 *)run->mmio.data; break;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Convert BE data from userland back to LE. */
|
|
|
|
switch (run->mmio.len) {
|
2010-01-08 09:58:01 +08:00
|
|
|
case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
|
|
|
|
case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
|
|
|
|
case 1: gpr = *(u8 *)run->mmio.data; break;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
}
|
2010-01-08 09:58:01 +08:00
|
|
|
|
2010-02-19 18:00:30 +08:00
|
|
|
if (vcpu->arch.mmio_sign_extend) {
|
|
|
|
switch (run->mmio.len) {
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
case 4:
|
|
|
|
gpr = (s64)(s32)gpr;
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
case 2:
|
|
|
|
gpr = (s64)(s16)gpr;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
gpr = (s64)(s8)gpr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-08 09:58:01 +08:00
|
|
|
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
|
2010-02-19 18:00:29 +08:00
|
|
|
|
|
|
|
switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
|
|
|
|
case KVM_REG_GPR:
|
|
|
|
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
|
|
|
|
break;
|
|
|
|
case KVM_REG_FPR:
|
|
|
|
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
|
|
|
break;
|
2010-04-01 21:33:21 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S
|
2010-02-19 18:00:29 +08:00
|
|
|
case KVM_REG_QPR:
|
|
|
|
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
|
|
|
break;
|
|
|
|
case KVM_REG_FQPR:
|
|
|
|
vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
|
|
|
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
|
|
|
|
break;
|
2010-04-01 21:33:21 +08:00
|
|
|
#endif
|
2010-02-19 18:00:29 +08:00
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|
|
|
unsigned int rt, unsigned int bytes, int is_bigendian)
|
|
|
|
{
|
|
|
|
if (bytes > sizeof(run->mmio.data)) {
|
|
|
|
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
|
|
|
|
run->mmio.len);
|
|
|
|
}
|
|
|
|
|
|
|
|
run->mmio.phys_addr = vcpu->arch.paddr_accessed;
|
|
|
|
run->mmio.len = bytes;
|
|
|
|
run->mmio.is_write = 0;
|
|
|
|
|
|
|
|
vcpu->arch.io_gpr = rt;
|
|
|
|
vcpu->arch.mmio_is_bigendian = is_bigendian;
|
|
|
|
vcpu->mmio_needed = 1;
|
|
|
|
vcpu->mmio_is_write = 0;
|
2010-02-19 18:00:30 +08:00
|
|
|
vcpu->arch.mmio_sign_extend = 0;
|
2008-04-17 12:28:09 +08:00
|
|
|
|
|
|
|
return EMULATE_DO_MMIO;
|
|
|
|
}
|
|
|
|
|
2010-02-19 18:00:30 +08:00
|
|
|
/* Same as above, but sign extends */
|
|
|
|
int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|
|
|
unsigned int rt, unsigned int bytes, int is_bigendian)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
|
|
|
|
vcpu->arch.mmio_sign_extend = 1;
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
2010-02-19 18:00:29 +08:00
|
|
|
u64 val, unsigned int bytes, int is_bigendian)
|
2008-04-17 12:28:09 +08:00
|
|
|
{
|
|
|
|
void *data = run->mmio.data;
|
|
|
|
|
|
|
|
if (bytes > sizeof(run->mmio.data)) {
|
|
|
|
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
|
|
|
|
run->mmio.len);
|
|
|
|
}
|
|
|
|
|
|
|
|
run->mmio.phys_addr = vcpu->arch.paddr_accessed;
|
|
|
|
run->mmio.len = bytes;
|
|
|
|
run->mmio.is_write = 1;
|
|
|
|
vcpu->mmio_needed = 1;
|
|
|
|
vcpu->mmio_is_write = 1;
|
|
|
|
|
|
|
|
/* Store the value at the lowest bytes in 'data'. */
|
|
|
|
if (is_bigendian) {
|
|
|
|
switch (bytes) {
|
2010-02-19 18:00:29 +08:00
|
|
|
case 8: *(u64 *)data = val; break;
|
2008-04-17 12:28:09 +08:00
|
|
|
case 4: *(u32 *)data = val; break;
|
|
|
|
case 2: *(u16 *)data = val; break;
|
|
|
|
case 1: *(u8 *)data = val; break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Store LE value into 'data'. */
|
|
|
|
switch (bytes) {
|
|
|
|
case 4: st_le32(data, val); break;
|
|
|
|
case 2: st_le16(data, val); break;
|
|
|
|
case 1: *(u8 *)data = val; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return EMULATE_DO_MMIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
sigset_t sigsaved;
|
|
|
|
|
|
|
|
if (vcpu->sigset_active)
|
|
|
|
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
|
|
|
|
|
|
|
if (vcpu->mmio_needed) {
|
|
|
|
if (!vcpu->mmio_is_write)
|
|
|
|
kvmppc_complete_mmio_load(vcpu, run);
|
|
|
|
vcpu->mmio_needed = 0;
|
|
|
|
} else if (vcpu->arch.dcr_needed) {
|
|
|
|
if (!vcpu->arch.dcr_is_write)
|
|
|
|
kvmppc_complete_dcr_load(vcpu, run);
|
|
|
|
vcpu->arch.dcr_needed = 0;
|
2010-03-25 04:48:30 +08:00
|
|
|
} else if (vcpu->arch.osi_needed) {
|
|
|
|
u64 *gprs = run->osi.gprs;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 32; i++)
|
|
|
|
kvmppc_set_gpr(vcpu, i, gprs[i]);
|
|
|
|
vcpu->arch.osi_needed = 0;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
} else if (vcpu->arch.hcall_needed) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
|
|
|
|
for (i = 0; i < 9; ++i)
|
|
|
|
kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
|
|
|
|
vcpu->arch.hcall_needed = 0;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2008-11-05 23:36:14 +08:00
|
|
|
kvmppc_core_deliver_interrupts(vcpu);
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2011-06-29 08:19:50 +08:00
|
|
|
r = kvmppc_vcpu_run(run, vcpu);
|
2008-04-17 12:28:09 +08:00
|
|
|
|
|
|
|
if (vcpu->sigset_active)
|
|
|
|
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
|
|
|
{
|
2010-03-25 04:48:18 +08:00
|
|
|
if (irq->irq == KVM_INTERRUPT_UNSET)
|
|
|
|
kvmppc_core_dequeue_external(vcpu, irq);
|
|
|
|
else
|
|
|
|
kvmppc_core_queue_external(vcpu, irq);
|
2008-04-26 06:55:49 +08:00
|
|
|
|
|
|
|
if (waitqueue_active(&vcpu->wq)) {
|
|
|
|
wake_up_interruptible(&vcpu->wq);
|
|
|
|
vcpu->stat.halt_wakeup++;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
} else if (vcpu->cpu != -1) {
|
|
|
|
smp_send_reschedule(vcpu->cpu);
|
2008-04-26 06:55:49 +08:00
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-03-25 04:48:29 +08:00
|
|
|
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_enable_cap *cap)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (cap->flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (cap->cap) {
|
2010-03-25 04:48:30 +08:00
|
|
|
case KVM_CAP_PPC_OSI:
|
|
|
|
r = 0;
|
|
|
|
vcpu->arch.osi_enabled = true;
|
|
|
|
break;
|
2010-03-25 04:48:29 +08:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_mp_state *mp_state)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
|
|
|
struct kvm_mp_state *mp_state)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
long kvm_arch_vcpu_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = filp->private_data;
|
|
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
long r;
|
|
|
|
|
2010-05-13 17:35:17 +08:00
|
|
|
switch (ioctl) {
|
|
|
|
case KVM_INTERRUPT: {
|
2008-04-17 12:28:09 +08:00
|
|
|
struct kvm_interrupt irq;
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&irq, argp, sizeof(irq)))
|
2010-05-13 17:35:17 +08:00
|
|
|
goto out;
|
2008-04-17 12:28:09 +08:00
|
|
|
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
|
2010-05-13 17:35:17 +08:00
|
|
|
goto out;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
2010-05-13 17:30:43 +08:00
|
|
|
|
2010-03-25 04:48:29 +08:00
|
|
|
case KVM_ENABLE_CAP:
|
|
|
|
{
|
|
|
|
struct kvm_enable_cap cap;
|
|
|
|
r = -EFAULT;
|
|
|
|
if (copy_from_user(&cap, argp, sizeof(cap)))
|
|
|
|
goto out;
|
|
|
|
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
|
|
|
break;
|
|
|
|
}
|
2008-04-17 12:28:09 +08:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2010-07-29 20:48:08 +08:00
|
|
|
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
|
|
|
|
{
|
|
|
|
u32 inst_lis = 0x3c000000;
|
|
|
|
u32 inst_ori = 0x60000000;
|
|
|
|
u32 inst_nop = 0x60000000;
|
|
|
|
u32 inst_sc = 0x44000002;
|
|
|
|
u32 inst_imm_mask = 0xffff;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The hypercall to get into KVM from within guest context is as
|
|
|
|
* follows:
|
|
|
|
*
|
|
|
|
* lis r0, r0, KVM_SC_MAGIC_R0@h
|
|
|
|
* ori r0, KVM_SC_MAGIC_R0@l
|
|
|
|
* sc
|
|
|
|
* nop
|
|
|
|
*/
|
|
|
|
pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
|
|
|
|
pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
|
|
|
|
pvinfo->hcall[2] = inst_sc;
|
|
|
|
pvinfo->hcall[3] = inst_nop;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
|
{
|
2010-07-29 20:48:08 +08:00
|
|
|
void __user *argp = (void __user *)arg;
|
2008-04-17 12:28:09 +08:00
|
|
|
long r;
|
|
|
|
|
|
|
|
switch (ioctl) {
|
2010-07-29 20:48:08 +08:00
|
|
|
case KVM_PPC_GET_PVINFO: {
|
|
|
|
struct kvm_ppc_pvinfo pvinfo;
|
2010-10-30 17:04:24 +08:00
|
|
|
memset(&pvinfo, 0, sizeof(pvinfo));
|
2010-07-29 20:48:08 +08:00
|
|
|
r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
|
|
|
|
if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
|
|
|
|
r = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2008-04-17 12:28:09 +08:00
|
|
|
default:
|
2009-08-26 19:57:07 +08:00
|
|
|
r = -ENOTTY;
|
2008-04-17 12:28:09 +08:00
|
|
|
}
|
|
|
|
|
2010-07-29 20:48:08 +08:00
|
|
|
out:
|
2008-04-17 12:28:09 +08:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_init(void *opaque)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_arch_exit(void)
|
|
|
|
{
|
|
|
|
}
|