KVM fixes for v4.9-rc6
ARM: - Fix handling of the 32bit cycle counter - Fix cycle counter filtering x86: - Fix a race leading to double unregistering of user notifiers - Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead - Use SRCU around kvm_lapic_set_vapic_addr - Avoid recursive flushing of asynchronous page faults - Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP - Let userspace know that KVM_GET_CLOCK is useful with master clock; 4.9 changed the return value to better match the guest clock, but didn't provide means to let guests take advantage of it -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJYMKbdAAoJEED/6hsPKofoPcEIAJF7hsuO3B2dMfUTz1EK+4IH B7JXr9mlAAEG61y82EY06Es+3gt69XBiE5iKBpxlL6jIJJiUOd+oOdygV0hv4D0K G6A03DsCWX16yJKjS7oGq4WOAiDGOpk7SU5YYlFZGqCzhaqScY2ecQFKEUYayJtt nXG+i22eFKccrD8wlkm3ZYEjl1Hif7bUmHfxL/CBec1cDNxOys1dB24VsZl90n89 7pMUtzOTskUXjbNX+cKmFtR18/XUdlucnn0w9AApf3M8GnmUxIjIaeFSLbzuNz84 U2o3LdxrYysSKSsc7VleHtWVfCbPbC62vpUI51XdNw0u7BHlKkVdvBfJEUmSpkw= =Crjd -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Radim Krčmář: "ARM: - Fix handling of the 32bit cycle counter - Fix cycle counter filtering x86: - Fix a race leading to double unregistering of user notifiers - Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead - Use SRCU around kvm_lapic_set_vapic_addr - Avoid recursive flushing of asynchronous page faults - Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP - Let userspace know that KVM_GET_CLOCK is useful with master clock; 4.9 changed the return value to better match the guest clock, but didn't provide means to let guests take advantage of it" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr KVM: async_pf: avoid recursive flushing of work items kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use KVM: Disable irq while unregistering user notifier KVM: x86: do not go through vcpu in __get_kvmclock_ns KVM: arm64: Fix the issues when guest PMCCFILTR is configured arm64: KVM: pmu: Fix AArch32 cycle counter access
This commit is contained in:
commit
dce9ce3615
|
@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In
|
|||
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
|
||||
such as migration.
|
||||
|
||||
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
|
||||
set of bits that KVM can return in struct kvm_clock_data's flag member.
|
||||
|
||||
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
|
||||
value is the exact kvmclock value seen by all VCPUs at the instant
|
||||
when KVM_GET_CLOCK was called. If clear, the returned value is simply
|
||||
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
|
||||
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
|
||||
but the exact value read by each VCPU could differ, because the host
|
||||
TSC is not stable.
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
|
|
|
@ -46,7 +46,15 @@
|
|||
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
|
||||
|
||||
#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
|
||||
/*
|
||||
* PMUv3 event types: required events
|
||||
*/
|
||||
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
|
||||
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
|
||||
|
||||
/*
|
||||
* Event filters for PMUv3
|
||||
|
|
|
@ -31,17 +31,9 @@
|
|||
|
||||
/*
|
||||
* ARMv8 PMUv3 Performance Events handling code.
|
||||
* Common event types.
|
||||
* Common event types (some are defined in asm/perf_event.h).
|
||||
*/
|
||||
|
||||
/* Required events. */
|
||||
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
|
||||
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
|
||||
|
||||
/* At least one of the following is required. */
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
|
||||
|
|
|
@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
|
|||
|
||||
idx = ARMV8_PMU_CYCLE_IDX;
|
||||
} else {
|
||||
BUG();
|
||||
return false;
|
||||
}
|
||||
} else if (r->CRn == 0 && r->CRm == 9) {
|
||||
/* PMCCNTR */
|
||||
if (pmu_access_event_counter_el0_disabled(vcpu))
|
||||
return false;
|
||||
|
||||
idx = ARMV8_PMU_CYCLE_IDX;
|
||||
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
|
||||
/* PMEVCNTRn_EL0 */
|
||||
if (pmu_access_event_counter_el0_disabled(vcpu))
|
||||
|
@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
|
|||
|
||||
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
|
||||
} else {
|
||||
BUG();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!pmu_counter_idx_valid(vcpu, idx))
|
||||
|
|
|
@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
}
|
||||
|
||||
|
||||
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
|
||||
}
|
||||
|
||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
|
@ -163,9 +173,12 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
|||
struct kvm_lapic_irq irq;
|
||||
int r;
|
||||
|
||||
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
|
||||
return -EWOULDBLOCK;
|
||||
switch (e->type) {
|
||||
case KVM_IRQ_ROUTING_HV_SINT:
|
||||
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
|
||||
line_status);
|
||||
|
||||
case KVM_IRQ_ROUTING_MSI:
|
||||
if (kvm_msi_route_invalid(kvm, e))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -173,7 +186,12 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
|||
|
||||
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
|
||||
return r;
|
||||
else
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
|
@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
|||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
|
||||
}
|
||||
|
||||
int kvm_set_routing_entry(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
|
@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
|||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
|
||||
int irq_source_id, int level, bool line_status)
|
||||
{
|
||||
switch (irq->type) {
|
||||
case KVM_IRQ_ROUTING_HV_SINT:
|
||||
return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
|
||||
line_status);
|
||||
default:
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
kvm_hv_irq_routing_update(kvm);
|
||||
|
|
|
@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
|
|||
struct kvm_shared_msrs *locals
|
||||
= container_of(urn, struct kvm_shared_msrs, urn);
|
||||
struct kvm_shared_msr_values *values;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Disabling irqs at this point since the following code could be
|
||||
* interrupted and executed through kvm_arch_hardware_disable()
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
if (locals->registered) {
|
||||
locals->registered = false;
|
||||
user_return_notifier_unregister(urn);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
|
||||
values = &locals->values[slot];
|
||||
if (values->host != values->curr) {
|
||||
|
@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
|
|||
values->curr = values->host;
|
||||
}
|
||||
}
|
||||
locals->registered = false;
|
||||
user_return_notifier_unregister(urn);
|
||||
}
|
||||
|
||||
static void shared_msr_update(unsigned slot, u32 msr)
|
||||
|
@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
|||
|
||||
static u64 __get_kvmclock_ns(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
s64 ns;
|
||||
struct pvclock_vcpu_time_info hv_clock;
|
||||
|
||||
if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
|
||||
u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
||||
ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
|
||||
} else {
|
||||
ns = ktime_get_boot_ns() + ka->kvmclock_offset;
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
if (!ka->use_master_clock) {
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
return ktime_get_boot_ns() + ka->kvmclock_offset;
|
||||
}
|
||||
|
||||
return ns;
|
||||
hv_clock.tsc_timestamp = ka->master_cycle_now;
|
||||
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
|
||||
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
|
||||
&hv_clock.tsc_shift,
|
||||
&hv_clock.tsc_to_system_mul);
|
||||
return __pvclock_read_cycles(&hv_clock, rdtsc());
|
||||
}
|
||||
|
||||
u64 get_kvmclock_ns(struct kvm *kvm)
|
||||
|
@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_PIT_STATE2:
|
||||
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
|
||||
case KVM_CAP_XEN_HVM:
|
||||
case KVM_CAP_ADJUST_CLOCK:
|
||||
case KVM_CAP_VCPU_EVENTS:
|
||||
case KVM_CAP_HYPERV:
|
||||
case KVM_CAP_HYPERV_VAPIC:
|
||||
|
@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
#endif
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_ADJUST_CLOCK:
|
||||
r = KVM_CLOCK_TSC_STABLE;
|
||||
break;
|
||||
case KVM_CAP_X86_SMM:
|
||||
/* SMBASE is usually relocated above 1M on modern chipsets,
|
||||
* and SMM handlers might indeed rely on 4G segment limits,
|
||||
|
@ -3415,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
};
|
||||
case KVM_SET_VAPIC_ADDR: {
|
||||
struct kvm_vapic_addr va;
|
||||
int idx;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
|
@ -3422,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
r = -EFAULT;
|
||||
if (copy_from_user(&va, argp, sizeof va))
|
||||
goto out;
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
break;
|
||||
}
|
||||
case KVM_X86_SETUP_MCE: {
|
||||
|
@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
struct kvm_clock_data user_ns;
|
||||
u64 now_ns;
|
||||
|
||||
now_ns = get_kvmclock_ns(kvm);
|
||||
local_irq_disable();
|
||||
now_ns = __get_kvmclock_ns(kvm);
|
||||
user_ns.clock = now_ns;
|
||||
user_ns.flags = 0;
|
||||
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
|
||||
local_irq_enable();
|
||||
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
|
||||
|
||||
r = -EFAULT;
|
||||
|
|
|
@ -972,12 +972,19 @@ struct kvm_irqfd {
|
|||
__u8 pad[16];
|
||||
};
|
||||
|
||||
/* For KVM_CAP_ADJUST_CLOCK */
|
||||
|
||||
/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
|
||||
#define KVM_CLOCK_TSC_STABLE 2
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock;
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
/* For KVM_CAP_SW_TLB */
|
||||
|
||||
#define KVM_MMU_FSL_BOOKE_NOHV 0
|
||||
#define KVM_MMU_FSL_BOOKE_HV 1
|
||||
|
||||
|
|
|
@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
|||
continue;
|
||||
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
|
||||
& ARMV8_PMU_EVTYPE_EVENT;
|
||||
if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
|
||||
if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
|
||||
&& (enable & BIT(i))) {
|
||||
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
|
||||
reg = lower_32_bits(reg);
|
||||
|
@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
|||
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
|
||||
|
||||
/* Software increment event does't need to be backed by a perf event */
|
||||
if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
|
||||
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
|
||||
select_idx != ARMV8_PMU_CYCLE_IDX)
|
||||
return;
|
||||
|
||||
memset(&attr, 0, sizeof(struct perf_event_attr));
|
||||
|
@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
|||
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
|
||||
attr.exclude_hv = 1; /* Don't count EL2 events */
|
||||
attr.exclude_host = 1; /* Don't count host events */
|
||||
attr.config = eventsel;
|
||||
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
|
||||
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
|
||||
|
||||
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
|
||||
/* The initial sample period (overflow count) of an event. */
|
||||
|
|
|
@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work)
|
|||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
list_add_tail(&apf->link, &vcpu->async_pf.done);
|
||||
apf->vcpu = NULL;
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
/*
|
||||
|
@ -113,6 +114,8 @@ static void async_pf_execute(struct work_struct *work)
|
|||
|
||||
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
|
||||
/* cancel outstanding work queue item */
|
||||
while (!list_empty(&vcpu->async_pf.queue)) {
|
||||
struct kvm_async_pf *work =
|
||||
|
@ -120,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
|||
typeof(*work), queue);
|
||||
list_del(&work->queue);
|
||||
|
||||
/*
|
||||
* We know it's present in vcpu->async_pf.done, do
|
||||
* nothing here.
|
||||
*/
|
||||
if (!work->vcpu)
|
||||
continue;
|
||||
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
|
||||
flush_work(&work->work);
|
||||
#else
|
||||
|
@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
|||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
#endif
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
while (!list_empty(&vcpu->async_pf.done)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_first_entry(&vcpu->async_pf.done,
|
||||
|
|
Loading…
Reference in New Issue