From 4e4468fac4381b92eb333d94256e7fb8350f3de3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Apr 2014 11:46:04 +0100 Subject: [PATCH 1/9] ARM: KVM: disable KVM in Kconfig on big-endian systems KVM currently crashes and burns on big-endian hosts, so don't allow it to be selected until we've got that fixed. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..4be5bb150bdd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE + depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. From e8e249d78e0600cb892c87992b6c8c9ea3b301ae Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 21 Feb 2014 18:05:05 +0100 Subject: [PATCH 2/9] kvm: Use pci_enable_msix_exact() instead of pci_enable_msix() As result of deprecation of MSI-X/MSI enablement functions pci_enable_msix() and pci_enable_msi_block() all drivers using these two interfaces need to be updated to use the new pci_enable_msi_range() or pci_enable_msi_exact() and pci_enable_msix_range() or pci_enable_msix_exact() interfaces. Signed-off-by: Alexander Gordeev Cc: Gleb Natapov Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Cc: linux-pci@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/assigned-dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 8db43701016f..bf06577fea51 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -395,7 +395,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, if (dev->entries_nr == 0) return r; - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + r = pci_enable_msix_exact(dev->dev, + dev->host_msix_entries, dev->entries_nr); if (r) return r; From fe2b201b3be91575857227da7ef21f661bc460b1 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Mon, 21 Apr 2014 15:20:14 -0400 Subject: [PATCH 3/9] KVM: x86: Check for host supported fields in shadow vmcs We track shadow vmcs fields through two static lists, one for read only and another for r/w fields. However, with addition of new vmcs fields, not all fields may be supported on all hosts. If so, copy_vmcs12_to_shadow() trying to vmwrite on unsupported hosts will result in a vmwrite error. For example, commit 36be0b9deb23161 introduced GUEST_BNDCFGS, which is not supported by all processors. Filter out host unsupported fields before letting guests use shadow vmcs Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f68c5831924..33e8c028842f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -503,7 +503,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) [number##_HIGH] = VMCS12_OFFSET(name)+4 -static const unsigned long shadow_read_only_fields[] = { +static unsigned long shadow_read_only_fields[] = { /* * We do NOT shadow fields that are modified when L0 * traps and emulates any vmx instruction (e.g. VMPTRLD, @@ -526,10 +526,10 @@ static const unsigned long shadow_read_only_fields[] = { GUEST_LINEAR_ADDRESS, GUEST_PHYSICAL_ADDRESS }; -static const int max_shadow_read_only_fields = +static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); -static const unsigned long shadow_read_write_fields[] = { +static unsigned long shadow_read_write_fields[] = { GUEST_RIP, GUEST_RSP, GUEST_CR0, @@ -558,7 +558,7 @@ static const unsigned long shadow_read_write_fields[] = { HOST_FS_SELECTOR, HOST_GS_SELECTOR }; -static const int max_shadow_read_write_fields = +static int max_shadow_read_write_fields = ARRAY_SIZE(shadow_read_write_fields); static const unsigned short vmcs_field_to_offset_table[] = { @@ -3009,6 +3009,41 @@ static void free_kvm_area(void) } } +static void init_vmcs_shadow_fields(void) +{ + int i, j; + + /* No checks for read only fields yet */ + + for (i = j = 0; i < max_shadow_read_write_fields; i++) { + switch (shadow_read_write_fields[i]) { + case GUEST_BNDCFGS: + if (!vmx_mpx_supported()) + continue; + break; + default: + break; + } + + if (j < i) + shadow_read_write_fields[j] = + shadow_read_write_fields[i]; + j++; + } + max_shadow_read_write_fields = j; + + /* shadowed fields guest access without vmexit */ + for (i = 0; i < max_shadow_read_write_fields; i++) { + clear_bit(shadow_read_write_fields[i], + vmx_vmwrite_bitmap); + clear_bit(shadow_read_write_fields[i], + vmx_vmread_bitmap); + } + for (i = 0; i < max_shadow_read_only_fields; i++) + clear_bit(shadow_read_only_fields[i], + vmx_vmread_bitmap); +} + static __init int alloc_kvm_area(void) { int cpu; @@ -3039,6 +3074,8 @@ static __init int hardware_setup(void) enable_vpid = 0; if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; + if (enable_shadow_vmcs) + init_vmcs_shadow_fields(); if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels()) { @@ -8803,14 +8840,6 @@ static int __init vmx_init(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* shadowed read/write fields */ - for (i = 0; i < max_shadow_read_write_fields; i++) { - clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); - clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); - } - /* shadowed read only fields */ - for (i = 0; i < max_shadow_read_only_fields; i++) - clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); /* * Allow direct access to the PC debug port (it is often used for I/O From 5d4e08c45a6cf8f1ab3c7fa375007635ac569165 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 28 Mar 2014 14:25:19 +0000 Subject: [PATCH 4/9] arm: KVM: fix possible misalignment of PGDs and bounce page The kvm/mmu code shared by arm and arm64 uses kalloc() to allocate a bounce page (if hypervisor init code crosses page boundary) and hypervisor PGDs. The problem is that kalloc() does not guarantee the proper alignment. In the case of the bounce page, the page sized buffer allocated may also cross a page boundary negating the purpose and leading to a hang during kvm initialization. Likewise the PGDs allocated may not meet the minimum alignment requirements of the underlying MMU. This patch uses __get_free_page() to guarantee the worst case alignment needs of the bounce page and PGDs on both arm and arm64. Cc: # 3.10+ Signed-off-by: Mark Salter Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80bb1e6c2c29..16f804938b8f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -330,7 +332,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -1024,7 +1026,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -1050,8 +1052,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; From 5c8818b46e067120d33bbb2f610313240d76ba02 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 26 Apr 2014 03:43:01 -0700 Subject: [PATCH 5/9] MAINTAINERS: co-maintainance of KVM/{arm,arm64} The KVM/{arm,arm64} ports are sharing a lot of code, and are effectively co-maintained (and have been for quite a while). Make the situation official and list the two maintainers for both ports. Cc: Catalin Marinas Cc: Russell King Cc: Paolo Bonzini Cc: Gleb Natapov Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Acked-by: Catalin Marinas Signed-off-by: Paolo Bonzini --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6dc67b1fdb50..24815e784905 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5108,14 +5108,19 @@ F: drivers/s390/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR ARM M: Christoffer Dall +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu W: http://systems.cs.columbia.edu/projects/kvm-arm S: Supported F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* F: arch/arm/kvm/ +F: virt/kvm/arm/ +F: include/kvm/arm_* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) +M: Christoffer Dall M: Marc Zyngier L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu From 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Thu, 10 Apr 2014 13:14:32 +0100 Subject: [PATCH 6/9] KVM: ARM: vgic: Fix sgi dispatch problem When dispatch SGI(mode == 0), that is the vcpu of VM should send sgi to the cpu which the target_cpus list. So, there must add the "break" to branch of case 0. Cc: # 3.10+ Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 47b29834a6b6..7e8b44efb739 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; From 41c22f626254b9dc0376928cae009e73d1b6a49a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:26:01 +0200 Subject: [PATCH 7/9] KVM: async_pf: mm->mm_users can not pin apf->mm get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc was already called (or is in progress), mm->mm_count can only pin mm->pgd and mm_struct itself. Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users. kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks fine at first glance, it seems that this ->mm is only used to verify that current->mm == kvm->mm. Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c4514..06e6401d6ef4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } From f2ae85b2ab3776b9e4e42e5b6fa090f40d396794 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Apr 2014 00:07:18 +0200 Subject: [PATCH 8/9] KVM: arm/arm64: vgic: fix GICD_ICFGR register accesses Since KVM internally represents the ICFGR registers by stuffing two of them into one word, the offset for accessing the internal representation and the one for the MMIO based access are different. So keep the original offset around, but adjust the internal array offset by one bit. Reported-by: Haibin Wang Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7e8b44efb739..f9af48c9eb37 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { From 30c2117085bc4e05d091cee6eba79f069b41a9cd Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Tue, 29 Apr 2014 14:49:17 +0800 Subject: [PATCH 9/9] KVM: ARM: vgic: Fix the overlap check action about setting the GICD & GICC base address. Currently below check in vgic_ioaddr_overlap will always succeed, because the vgic dist base and vgic cpu base are still kept UNDEF after initialization. The code as follows will be return forever. if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) return 0; So, before invoking the vgic_ioaddr_overlap, it needs to set the corresponding base address firstly. Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f9af48c9eb37..56ff9bebb577 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; }