linux_old1/arch/powerpc/include/asm/kvm_book3s_64.h

/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 * Copyright SUSE Linux Products GmbH 2010
 *
 * Authors: Alexander Graf <agraf@suse.de>
 */

#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__

#ifdef CONFIG_KVM_BOOK3S_PR
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
	preempt_disable();
	return &get_paca()->shadow_vcpu;
}

static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
{
	preempt_enable();
}
#endif

#define SPAPR_TCE_SHIFT		12

#ifdef CONFIG_KVM_BOOK3S_64_HV
/* For now use fixed-size 16MB page table */
#define HPT_ORDER	24
#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
#define HPT_NPTE	(HPT_NPTEG << 3)		/* 8 PTEs per PTEG */
#define HPT_HASH_MASK	(HPT_NPTEG - 1)
#endif

/*
 * We use a lock bit in HPTE dword 0 to synchronize updates and
 * accesses to each HPTE, and another bit to indicate non-present
 * HPTEs.
 */
#define HPTE_V_HVLOCK	0x40UL

static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
{
	unsigned long tmp, old;

	asm volatile("	ldarx	%0,0,%2\n"
		     "	and.	%1,%0,%3\n"
		     "	bne	2f\n"
		     "	ori	%0,%0,%4\n"
		     "  stdcx.	%0,0,%2\n"
		     "	beq+	2f\n"
		     "	li	%1,%3\n"
		     "2:	isync"
		     : "=&r" (tmp), "=&r" (old)
		     : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
		     : "cc", "memory");
	return old == 0;
}

static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
					     unsigned long pte_index)
{
	unsigned long rb, va_low;

	rb = (v & ~0x7fUL) << 16;		/* AVA field */
	va_low = pte_index >> 3;
	if (v & HPTE_V_SECONDARY)
		va_low = ~va_low;
	/* xor vsid from AVA */
	if (!(v & HPTE_V_1TB_SEG))
		va_low ^= v >> 12;
	else
		va_low ^= v >> 24;
	va_low &= 0x7ff;
	if (v & HPTE_V_LARGE) {
		rb |= 1;			/* L field */
		if (cpu_has_feature(CPU_FTR_ARCH_206) &&
		    (r & 0xff000)) {
			/* non-16MB large page, must be 64k */
			/* (masks depend on page size) */
			rb |= 0x1000;		/* page encoding in LP field */
			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
			rb |= (va_low & 0xfe);	/* AVAL field (P7 doesn't seem to care) */
		}
	} else {
		/* 4kB page */
		rb |= (va_low & 0x7ff) << 12;	/* remaining 11b of VA */
	}
	rb |= (v >> 54) & 0x300;		/* B field */
	return rb;
}

#endif /* __ASM_KVM_BOOK3S_64_H__ */
KVM: PPC: Add kvm_book3s_64.h In the process of generalizing as much code as possible, I also moved the shadow vcpu code together to a generic book3s file. Unfortunately the location of the shadow vcpu is different on 32 and 64 bit, so we need a wrapper function to tell us where it is. That sounded like a perfect fit for a subarch specific header file. Here we can put anything that needs to be different between those two. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2010-04-16 06:11:37 +08:00			`/*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License, version 2, as`
			`* published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
			`* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`
			`*`
			`* Copyright SUSE Linux Products GmbH 2010`
			`*`
			`* Authors: Alexander Graf <agraf@suse.de>`
			`*/`

			`#ifndef __ASM_KVM_BOOK3S_64_H__`
			`#define __ASM_KVM_BOOK3S_64_H__`

KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> 2011-06-29 08:21:34 +08:00			`#ifdef CONFIG_KVM_BOOK3S_PR`
KVM: PPC: Use get/set for to_svcpu to help preemption When running the 64-bit Book3s PR code without CONFIG_PREEMPT_NONE, we were doing a few things wrong, most notably access to PACA fields without making sure that the pointers stay stable accross the access (preempt_disable()). This patch moves to_svcpu towards a get/put model which allows us to disable preemption while accessing the shadow vcpu fields in the PACA. That way we can run preemptible and everyone's happy! Reported-by: Jörg Sommer <joerg@alea.gnuu.de> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2011-12-09 21:44:13 +08:00			`static inline struct kvmppc_book3s_shadow_vcpu svcpu_get(struct kvm_vcpu vcpu)`
KVM: PPC: Add kvm_book3s_64.h In the process of generalizing as much code as possible, I also moved the shadow vcpu code together to a generic book3s file. Unfortunately the location of the shadow vcpu is different on 32 and 64 bit, so we need a wrapper function to tell us where it is. That sounded like a perfect fit for a subarch specific header file. Here we can put anything that needs to be different between those two. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2010-04-16 06:11:37 +08:00			`{`
KVM: PPC: Use get/set for to_svcpu to help preemption When running the 64-bit Book3s PR code without CONFIG_PREEMPT_NONE, we were doing a few things wrong, most notably access to PACA fields without making sure that the pointers stay stable accross the access (preempt_disable()). This patch moves to_svcpu towards a get/put model which allows us to disable preemption while accessing the shadow vcpu fields in the PACA. That way we can run preemptible and everyone's happy! Reported-by: Jörg Sommer <joerg@alea.gnuu.de> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2011-12-09 21:44:13 +08:00			`preempt_disable();`
KVM: PPC: Add kvm_book3s_64.h In the process of generalizing as much code as possible, I also moved the shadow vcpu code together to a generic book3s file. Unfortunately the location of the shadow vcpu is different on 32 and 64 bit, so we need a wrapper function to tell us where it is. That sounded like a perfect fit for a subarch specific header file. Here we can put anything that needs to be different between those two. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2010-04-16 06:11:37 +08:00			`return &get_paca()->shadow_vcpu;`
			`}`
KVM: PPC: Use get/set for to_svcpu to help preemption When running the 64-bit Book3s PR code without CONFIG_PREEMPT_NONE, we were doing a few things wrong, most notably access to PACA fields without making sure that the pointers stay stable accross the access (preempt_disable()). This patch moves to_svcpu towards a get/put model which allows us to disable preemption while accessing the shadow vcpu fields in the PACA. That way we can run preemptible and everyone's happy! Reported-by: Jörg Sommer <joerg@alea.gnuu.de> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2011-12-09 21:44:13 +08:00
			`static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)`
			`{`
			`preempt_enable();`
			`}`
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> 2011-06-29 08:21:34 +08:00			`#endif`
KVM: PPC: Add kvm_book3s_64.h In the process of generalizing as much code as possible, I also moved the shadow vcpu code together to a generic book3s file. Unfortunately the location of the shadow vcpu is different on 32 and 64 bit, so we need a wrapper function to tell us where it is. That sounded like a perfect fit for a subarch specific header file. Here we can put anything that needs to be different between those two. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2010-04-16 06:11:37 +08:00
KVM: PPC: Accelerate H_PUT_TCE by implementing it in real mode This improves I/O performance for guests using the PAPR paravirtualization interface by making the H_PUT_TCE hcall faster, by implementing it in real mode. H_PUT_TCE is used for updating virtual IOMMU tables, and is used both for virtual I/O and for real I/O in the PAPR interface. Since this moves the IOMMU tables into the kernel, we define a new KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables. The ioctl returns a file descriptor which can be used to mmap the newly created table. The qemu driver models use them in the same way as userspace managed tables, but they can be updated directly by the guest with a real-mode H_PUT_TCE implementation, reducing the number of host/guest context switches during guest IO. There are certain circumstances where it is useful for userland qemu to write to the TCE table even if the kernel H_PUT_TCE path is used most of the time. Specifically, allowing this will avoid awkwardness when we need to reset the table. More importantly, we will in the future need to write the table in order to restore its state after a checkpoint resume or migration. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> 2011-06-29 08:22:41 +08:00			`#define SPAPR_TCE_SHIFT 12`

KVM: PPC: Keep a record of HV guest view of hashed page table entries This adds an array that parallels the guest hashed page table (HPT), that is, it has one entry per HPTE, used to store the guest's view of the second doubleword of the corresponding HPTE. The first doubleword in the HPTE is the same as the guest's idea of it, so we don't need to store a copy, but the second doubleword in the HPTE has the real page number rather than the guest's logical page number. This allows us to remove the back_translate() and reverse_xlate() functions. This "reverse mapping" array is vmalloc'd, meaning that to access it in real mode we have to walk the kernel's page tables explicitly. That is done by the new real_vmalloc_addr() function. (In fact this returns an address in the linear mapping, so the result is usable both in real mode and in virtual mode.) There are also some minor cleanups here: moving the definitions of HPT_ORDER etc. to a header file and defining HPT_NPTE for HPT_NPTEG << 3. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2011-12-12 20:27:39 +08:00			`#ifdef CONFIG_KVM_BOOK3S_64_HV`
			`/* For now use fixed-size 16MB page table */`
			`#define HPT_ORDER 24`
			`#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */`
			`#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */`
			`#define HPT_HASH_MASK (HPT_NPTEG - 1)`
			`#endif`

KVM: PPC: Make the H_ENTER hcall more reliable At present, our implementation of H_ENTER only makes one try at locking each slot that it looks at, and doesn't even retry the ldarx/stdcx. atomic update sequence that it uses to attempt to lock the slot. Thus it can return the H_PTEG_FULL error unnecessarily, particularly when the H_EXACT flag is set, meaning that the caller wants a specific PTEG slot. This improves the situation by making a second pass when no free HPTE slot is found, where we spin until we succeed in locking each slot in turn and then check whether it is full while we hold the lock. If the second pass fails, then we return H_PTEG_FULL. This also moves lock_hpte to a header file (since later commits in this series will need to use it from other source files) and renames it to try_lock_hpte, which is a somewhat less misleading name. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2011-12-12 20:30:16 +08:00			`/*`
			`* We use a lock bit in HPTE dword 0 to synchronize updates and`
			`* accesses to each HPTE, and another bit to indicate non-present`
			`* HPTEs.`
			`*/`
			`#define HPTE_V_HVLOCK 0x40UL`

			`static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)`
			`{`
			`unsigned long tmp, old;`

			`asm volatile(" ldarx %0,0,%2\n"`
			`" and. %1,%0,%3\n"`
			`" bne 2f\n"`
			`" ori %0,%0,%4\n"`
			`" stdcx. %0,0,%2\n"`
			`" beq+ 2f\n"`
			`" li %1,%3\n"`
			`"2: isync"`
			`: "=&r" (tmp), "=&r" (old)`
			`: "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)`
			`: "cc", "memory");`
			`return old == 0;`
			`}`

KVM: PPC: move compute_tlbie_rb to book3s_64 common header compute_tlbie_rb is only used on ppc64 and cannot be compiled on ppc32. Signed-off-by: Andreas Schwab <schwab@linux-m68k.org> Signed-off-by: Alexander Graf <agraf@suse.de> 2011-11-08 15:08:52 +08:00			`static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,`
			`unsigned long pte_index)`
			`{`
			`unsigned long rb, va_low;`

			`rb = (v & ~0x7fUL) << 16; /* AVA field */`
			`va_low = pte_index >> 3;`
			`if (v & HPTE_V_SECONDARY)`
			`va_low = ~va_low;`
			`/* xor vsid from AVA */`
			`if (!(v & HPTE_V_1TB_SEG))`
			`va_low ^= v >> 12;`
			`else`
			`va_low ^= v >> 24;`
			`va_low &= 0x7ff;`
			`if (v & HPTE_V_LARGE) {`
			`rb \|= 1; /* L field */`
			`if (cpu_has_feature(CPU_FTR_ARCH_206) &&`
			`(r & 0xff000)) {`
			`/* non-16MB large page, must be 64k */`
			`/* (masks depend on page size) */`
			`rb \|= 0x1000; /* page encoding in LP field */`
			`rb \|= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */`
			`rb \|= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */`
			`}`
			`} else {`
			`/* 4kB page */`
			`rb \|= (va_low & 0x7ff) << 12; /* remaining 11b of VA */`
			`}`
			`rb \|= (v >> 54) & 0x300; /* B field */`
			`return rb;`
			`}`

KVM: PPC: Add kvm_book3s_64.h In the process of generalizing as much code as possible, I also moved the shadow vcpu code together to a generic book3s file. Unfortunately the location of the shadow vcpu is different on 32 and 64 bit, so we need a wrapper function to tell us where it is. That sounded like a perfect fit for a subarch specific header file. Here we can put anything that needs to be different between those two. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com> 2010-04-16 06:11:37 +08:00			`#endif /* __ASM_KVM_BOOK3S_64_H__ */`