KVM fixes for v4.11-rc6
ARM: - Fix a problem with GICv3 userspace save/restore - Clarify GICv2 userspace save/restore ABI - Be more careful in clearing GIC LRs - Add missing synchronization primitive to our MMU handling code PPC: - Check for a NULL return from kzalloc s390: - Prevent translation exception errors on valid page tables for the instruction-exection-protection support x86: - Fix Page-Modification Logging when running a nested guest -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJY5/X8AAoJEED/6hsPKofo8hQH/As3CbihZMysaK6JJTx5oMZw b3W8p8xVXVu4dKM8WnXa6m5xBDFmOa7eBB+CtT3gP68XnFvMpr/vPmDv6v6i9p8q 7VyALDqqk2fxDmgHEwuETw9XZyuhdyCz/GaINCdnAJs25wTFOA7r0WEW5W8qRJpA 9nQirapdJcknymIch1JqeWlYYmbIaFzT8jItfA9QQ7F9mG4pxC8D1k2D56lNYwTf FJIgXgkMPe7CPDXmgc/KqT5+iVsc/+SgzP/WdH6bX/007TV71sksxxfz6fIrao0X RtcL2WIZTXBdSNrvXflHhCfYgogPgCnYp8AsYTIa+IEijcfteJx7UiET47Ne0Ow= =/SPG -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Radim Krčmář: "ARM: - Fix a problem with GICv3 userspace save/restore - Clarify GICv2 userspace save/restore ABI - Be more careful in clearing GIC LRs - Add missing synchronization primitive to our MMU handling code PPC: - Check for a NULL return from kzalloc s390: - Prevent translation exception errors on valid page tables for the instruction-exection-protection support x86: - Fix Page-Modification Logging when running a nested guest" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl KVM: nVMX: initialize PML fields in vmcs02 KVM: nVMX: do not leak PML full vmexit to L1 KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI KVM: arm64: Ensure LRs are clear when they should be kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd KVM: s390: remove change-recording override support arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm
This commit is contained in:
commit
542380a208
|
@ -83,6 +83,12 @@ Groups:
|
|||
|
||||
Bits for undefined preemption levels are RAZ/WI.
|
||||
|
||||
For historical reasons and to provide ABI compatibility with userspace we
|
||||
export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
|
||||
field in the lower 5 bits of a word, meaning that userspace must always
|
||||
use the lower 5 bits to communicate with the KVM device and must shift the
|
||||
value left by 3 places to obtain the actual priority mask level.
|
||||
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
|
|
|
@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void)
|
|||
if (__hyp_get_vectors() == hyp_default_vectors)
|
||||
cpu_init_hyp_mode(NULL);
|
||||
}
|
||||
|
||||
if (vgic_present)
|
||||
kvm_vgic_init_cpu_hardware();
|
||||
}
|
||||
|
||||
static void cpu_hyp_reset(void)
|
||||
|
|
|
@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
|
|||
phys_addr_t addr = start, end = start + size;
|
||||
phys_addr_t next;
|
||||
|
||||
assert_spin_locked(&kvm->mmu_lock);
|
||||
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
|
||||
do {
|
||||
next = stage2_pgd_addr_end(addr, end);
|
||||
if (!stage2_pgd_none(*pgd))
|
||||
unmap_stage2_puds(kvm, pgd, addr, next);
|
||||
/*
|
||||
* If the range is too large, release the kvm->mmu_lock
|
||||
* to prevent starvation and lockup detector warnings.
|
||||
*/
|
||||
if (next != end)
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
|
@ -803,6 +810,7 @@ void stage2_unmap_vm(struct kvm *kvm)
|
|||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
|
@ -810,6 +818,7 @@ void stage2_unmap_vm(struct kvm *kvm)
|
|||
stage2_unmap_memslot(kvm, memslot);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
}
|
||||
|
||||
|
@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
|
|||
if (kvm->arch.pgd == NULL)
|
||||
return;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
/* Free the HW pgd, one page at a time */
|
||||
free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
|
||||
kvm->arch.pgd = NULL;
|
||||
|
@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
(KVM_PHYS_SIZE >> PAGE_SHIFT))
|
||||
return -EFAULT;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
/*
|
||||
* A memory region could potentially cover multiple VMAs, and any holes
|
||||
* between them, so iterate over all of them to find out if we can map
|
||||
|
@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
pa += vm_start - vma->vm_start;
|
||||
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
|
||||
return -EINVAL;
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start,
|
||||
|
@ -1857,7 +1872,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
} while (hva < reg_end);
|
||||
|
||||
if (change == KVM_MR_FLAGS_ONLY)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
|
@ -1865,6 +1880,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
else
|
||||
stage2_flush_memslot(kvm, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
out:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
|
|||
/* start new resize */
|
||||
|
||||
resize = kzalloc(sizeof(*resize), GFP_KERNEL);
|
||||
if (!resize) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
resize->order = shift;
|
||||
resize->kvm = kvm;
|
||||
INIT_WORK(&resize->work, resize_hpt_prepare_work);
|
||||
|
|
|
@ -168,8 +168,7 @@ union page_table_entry {
|
|||
unsigned long z : 1; /* Zero Bit */
|
||||
unsigned long i : 1; /* Page-Invalid Bit */
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long co : 1; /* Change-Recording Override */
|
||||
unsigned long : 8;
|
||||
unsigned long : 9;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||
return PGM_PAGE_TRANSLATION;
|
||||
if (pte.z)
|
||||
return PGM_TRANSLATION_SPEC;
|
||||
if (pte.co && !edat1)
|
||||
return PGM_TRANSLATION_SPEC;
|
||||
dat_protection |= pte.p;
|
||||
raddr.pfra = pte.pfra;
|
||||
real_address:
|
||||
|
@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
|
|||
rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
|
||||
if (!rc && pte.i)
|
||||
rc = PGM_PAGE_TRANSLATION;
|
||||
if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
|
||||
if (!rc && pte.z)
|
||||
rc = PGM_TRANSLATION_SPEC;
|
||||
shadow_page:
|
||||
pte.p |= dat_protection;
|
||||
|
|
|
@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
|
||||
case EXIT_REASON_PREEMPTION_TIMER:
|
||||
return false;
|
||||
case EXIT_REASON_PML_FULL:
|
||||
/* We don't expose PML support to L1. */
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -10267,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
|
||||
}
|
||||
|
||||
if (enable_pml) {
|
||||
/*
|
||||
* Conceptually we want to copy the PML address and index from
|
||||
* vmcs01 here, and then back to vmcs01 on nested vmexit. But,
|
||||
* since we always flush the log on each vmexit, this happens
|
||||
* to be equivalent to simply resetting the fields in vmcs02.
|
||||
*/
|
||||
ASSERT(vmx->pml_pg);
|
||||
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
|
||||
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
||||
}
|
||||
|
||||
if (nested_cpu_has_ept(vmcs12)) {
|
||||
kvm_mmu_unload(vcpu);
|
||||
nested_ept_init_mmu_context(vcpu);
|
||||
|
|
|
@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
|
|||
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
int kvm_vgic_map_resources(struct kvm *kvm);
|
||||
int kvm_vgic_hyp_init(void);
|
||||
void kvm_vgic_init_cpu_hardware(void);
|
||||
|
||||
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
|
||||
bool level);
|
||||
|
|
|
@ -96,6 +96,9 @@
|
|||
#define GICH_MISR_EOI (1 << 0)
|
||||
#define GICH_MISR_U (1 << 1)
|
||||
|
||||
#define GICV_PMR_PRIORITY_SHIFT 3
|
||||
#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/irqdomain.h>
|
||||
|
|
|
@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
|
|||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
|
||||
*
|
||||
* For a specific CPU, initialize the GIC VE hardware.
|
||||
*/
|
||||
void kvm_vgic_init_cpu_hardware(void)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
|
||||
/*
|
||||
* We want to make sure the list registers start out clear so that we
|
||||
* only have the program the used registers.
|
||||
*/
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_init_lrs();
|
||||
else
|
||||
kvm_call_hyp(__vgic_v3_init_lrs);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
|
||||
* according to the host GIC model. Accordingly calls either
|
||||
|
|
|
@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
|
|||
val = vmcr.ctlr;
|
||||
break;
|
||||
case GIC_CPU_PRIMASK:
|
||||
val = vmcr.pmr;
|
||||
/*
|
||||
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
|
||||
* the PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* GICC_PMR.Priority, so we expose the upper five bits of
|
||||
* priority mask to userspace using the lower bits in the
|
||||
* unsigned long.
|
||||
*/
|
||||
val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
|
||||
GICV_PMR_PRIORITY_SHIFT;
|
||||
break;
|
||||
case GIC_CPU_BINPOINT:
|
||||
val = vmcr.bpr;
|
||||
|
@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
|
|||
vmcr.ctlr = val;
|
||||
break;
|
||||
case GIC_CPU_PRIMASK:
|
||||
vmcr.pmr = val;
|
||||
/*
|
||||
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
|
||||
* the PMR field as GICH_VMCR.VMPriMask rather than
|
||||
* GICC_PMR.Priority, so we expose the upper five bits of
|
||||
* priority mask to userspace using the lower bits in the
|
||||
* unsigned long.
|
||||
*/
|
||||
vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
|
||||
GICV_PMR_PRIORITY_MASK;
|
||||
break;
|
||||
case GIC_CPU_BINPOINT:
|
||||
vmcr.bpr = val;
|
||||
|
|
|
@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
|
|||
return (unsigned long *)val;
|
||||
}
|
||||
|
||||
static inline void vgic_v2_write_lr(int lr, u32 val)
|
||||
{
|
||||
void __iomem *base = kvm_vgic_global_state.vctrl_base;
|
||||
|
||||
writel_relaxed(val, base + GICH_LR0 + (lr * 4));
|
||||
}
|
||||
|
||||
void vgic_v2_init_lrs(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
|
||||
vgic_v2_write_lr(i, 0);
|
||||
}
|
||||
|
||||
void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
|
||||
|
@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
|||
GICH_VMCR_ALIAS_BINPOINT_MASK;
|
||||
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
|
||||
GICH_VMCR_BINPOINT_MASK;
|
||||
vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
|
||||
GICH_VMCR_PRIMASK_MASK;
|
||||
vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
|
||||
GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
|
||||
}
|
||||
|
@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
|||
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
|
||||
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
|
||||
GICH_VMCR_BINPOINT_SHIFT;
|
||||
vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
|
||||
GICH_VMCR_PRIMASK_SHIFT;
|
||||
vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
|
||||
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
|
||||
}
|
||||
|
||||
void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
|
|||
return irq->pending_latch || irq->line_level;
|
||||
}
|
||||
|
||||
/*
|
||||
* This struct provides an intermediate representation of the fields contained
|
||||
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
|
||||
* state to userspace can generate either GICv2 or GICv3 CPU interface
|
||||
* registers regardless of the hardware backed GIC used.
|
||||
*/
|
||||
struct vgic_vmcr {
|
||||
u32 ctlr;
|
||||
u32 abpr;
|
||||
u32 bpr;
|
||||
u32 pmr;
|
||||
u32 pmr; /* Priority mask field in the GICC_PMR and
|
||||
* ICC_PMR_EL1 priority field format */
|
||||
/* Below member variable are valid only for GICv3 */
|
||||
u32 grpen0;
|
||||
u32 grpen1;
|
||||
|
@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
|
|||
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
|
||||
enum vgic_type);
|
||||
|
||||
void vgic_v2_init_lrs(void);
|
||||
|
||||
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
|
||||
{
|
||||
if (irq->intid < VGIC_MIN_LPI)
|
||||
|
|
Loading…
Reference in New Issue