KVM: x86: do not attempt TSC synchronization on guest writes

KVM special-cases writes to MSR_IA32_TSC so that all CPUs have
the same base for the TSC.  This logic is complicated, and we
do not want it to have any effect once the VM is started.

In particular, if any guest started to synchronize its TSCs
with writes to MSR_IA32_TSC rather than MSR_IA32_TSC_ADJUST,
the additional effect of kvm_write_tsc code would be uncharted
territory.

Therefore, this patch makes writes to MSR_IA32_TSC behave
essentially the same as writes to MSR_IA32_TSC_ADJUST when
they come from the guest.  A new selftest (which passes
both before and after the patch) checks the current semantics
of writes to MSR_IA32_TSC and MSR_IA32_TSC_ADJUST originating
from both the host and the guest.

Upcoming work to remove the special side effects
of host-initiated writes to MSR_IA32_TSC and MSR_IA32_TSC_ADJUST
will be able to build onto this test, adjusting the host side
to use the new APIs and achieve the same effect.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2020-09-24 14:45:27 +02:00
parent a7d5c7ce41
commit 0c899c25d7
3 changed files with 179 additions and 20 deletions

View File

@ -2107,12 +2107,6 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
#endif #endif
} }
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
u64 curr_offset = vcpu->arch.l1_tsc_offset;
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
/* /*
* Multiply tsc by a fixed point number represented by ratio. * Multiply tsc by a fixed point number represented by ratio.
* *
@ -2174,14 +2168,13 @@ static inline bool kvm_check_tsc_unstable(void)
return check_tsc_unstable(); return check_tsc_unstable();
} }
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed; u64 offset, ns, elapsed;
unsigned long flags; unsigned long flags;
bool matched; bool matched;
bool already_matched; bool already_matched;
u64 data = msr->data;
bool synchronizing = false; bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
@ -2190,7 +2183,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec; elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) { if (vcpu->arch.virtual_tsc_khz) {
if (data == 0 && msr->host_initiated) { if (data == 0) {
/* /*
* detection of vcpu initialization -- need to sync * detection of vcpu initialization -- need to sync
* with other vCPUs. This particularly helps to keep * with other vCPUs. This particularly helps to keep
@ -2260,9 +2253,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
update_ia32_tsc_adjust_msr(vcpu, offset);
kvm_vcpu_write_tsc_offset(vcpu, offset); kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
@ -2277,8 +2267,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
} }
EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment) s64 adjustment)
{ {
@ -3073,7 +3061,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.msr_ia32_power_ctl = data; vcpu->arch.msr_ia32_power_ctl = data;
break; break;
case MSR_IA32_TSC: case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info); if (msr_info->host_initiated) {
kvm_synchronize_tsc(vcpu, data);
} else {
u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
adjust_tsc_offset_guest(vcpu, adj);
vcpu->arch.ia32_tsc_adjust_msr += adj;
}
break; break;
case MSR_IA32_XSS: case MSR_IA32_XSS:
if (!msr_info->host_initiated && if (!msr_info->host_initiated &&
@ -9839,7 +9833,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{ {
struct msr_data msr;
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
kvm_hv_vcpu_postcreate(vcpu); kvm_hv_vcpu_postcreate(vcpu);
@ -9847,10 +9840,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
if (mutex_lock_killable(&vcpu->mutex)) if (mutex_lock_killable(&vcpu->mutex))
return; return;
vcpu_load(vcpu); vcpu_load(vcpu);
msr.data = 0x0; kvm_synchronize_tsc(vcpu, 0);
msr.index = MSR_IA32_TSC;
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu); vcpu_put(vcpu);
/* poll control enabled by default */ /* poll control enabled by default */

View File

@ -55,6 +55,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += demand_paging_test

View File

@ -0,0 +1,168 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
*
* Copyright (C) 2020, Red Hat, Inc.
*/
#include <stdio.h>
#include <string.h>
#include "kvm_util.h"
#include "processor.h"
#define VCPU_ID 0
#define UNITY (1ull << 30)
#define HOST_ADJUST (UNITY * 64)
#define GUEST_STEP (UNITY * 4)
#define ROUND(x) ((x + UNITY / 2) & -UNITY)
#define rounded_rdmsr(x) ROUND(rdmsr(x))
#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
#define GUEST_ASSERT_EQ(a, b) do { \
__typeof(a) _a = (a); \
__typeof(b) _b = (b); \
if (_a != _b) \
ucall(UCALL_ABORT, 4, \
"Failed guest assert: " \
#a " == " #b, __LINE__, _a, _b); \
} while(0)
static void guest_code(void)
{
u64 val = 0;
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC affect both MSRs. */
val = 1ull * GUEST_STEP;
wrmsr(MSR_IA32_TSC, val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
GUEST_SYNC(2);
val = 2ull * GUEST_STEP;
wrmsr(MSR_IA32_TSC_ADJUST, val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Host: setting the TSC offset. */
GUEST_SYNC(3);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
* host-side offset and affect both MSRs.
*/
GUEST_SYNC(4);
val = 3ull * GUEST_STEP;
wrmsr(MSR_IA32_TSC_ADJUST, val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
* offset is now visible in MSR_IA32_TSC_ADJUST.
*/
GUEST_SYNC(5);
val = 4ull * GUEST_STEP;
wrmsr(MSR_IA32_TSC, val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
GUEST_DONE();
}
static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
{
struct ucall uc;
vcpu_args_set(vm, vcpuid, 1, vcpuid);
vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
switch (get_ucall(vm, vcpuid, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
stage + 1, (ulong)uc.args[1]);
return;
case UCALL_DONE:
return;
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%ld\n" \
"\tvalues: %#lx, %#lx", (const char *)uc.args[0],
__FILE__, uc.args[1], uc.args[2], uc.args[3]);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
}
}
int main(void)
{
struct kvm_vm *vm;
uint64_t val;
vm = vm_create_default(VCPU_ID, 0, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
val = 0;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC affect both MSRs. */
run_vcpu(vm, VCPU_ID, 1);
val = 1ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
run_vcpu(vm, VCPU_ID, 2);
val = 2ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Host: writes to MSR_IA32_TSC set the host-side offset
* and therefore do not change MSR_IA32_TSC_ADJUST.
*/
vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
run_vcpu(vm, VCPU_ID, 3);
/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456);
/* Restore previous value. */
vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
* host-side offset and affect both MSRs.
*/
run_vcpu(vm, VCPU_ID, 4);
val = 3ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
* offset is now visible in MSR_IA32_TSC_ADJUST.
*/
run_vcpu(vm, VCPU_ID, 5);
val = 4ull * GUEST_STEP;
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
kvm_vm_free(vm);
return 0;
}