Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Add support for vDSO acceleration of the "Hyper-V TSC page", to speed up clock reading on Hyper-V guests" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method x86/hyperv: Move TSC reading method to asm/mshyperv.h x86/hyperv: Implement hv_get_tsc_page()
This commit is contained in:
commit
aa2a4b6569
|
@ -17,6 +17,7 @@
|
|||
#include <asm/unistd.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/pvclock.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/kernel.h>
|
||||
|
@ -32,6 +33,11 @@ extern u8 pvclock_page
|
|||
__attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
extern u8 hvclock_page
|
||||
__attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_VDSO32
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
|
@ -141,6 +147,20 @@ static notrace u64 vread_pvclock(int *mode)
|
|||
return last;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
static notrace u64 vread_hvclock(int *mode)
|
||||
{
|
||||
const struct ms_hyperv_tsc_page *tsc_pg =
|
||||
(const struct ms_hyperv_tsc_page *)&hvclock_page;
|
||||
u64 current_tick = hv_read_tsc_page(tsc_pg);
|
||||
|
||||
if (current_tick != U64_MAX)
|
||||
return current_tick;
|
||||
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
notrace static u64 vread_tsc(void)
|
||||
{
|
||||
|
@ -172,6 +192,10 @@ notrace static inline u64 vgetsns(int *mode)
|
|||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
|
||||
cycles = vread_pvclock(mode);
|
||||
#endif
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
|
||||
cycles = vread_hvclock(mode);
|
||||
#endif
|
||||
else
|
||||
return 0;
|
||||
|
|
|
@ -25,7 +25,7 @@ SECTIONS
|
|||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 2 * PAGE_SIZE;
|
||||
vvar_start = . - 3 * PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
|
@ -36,6 +36,7 @@ SECTIONS
|
|||
#undef EMIT_VVAR
|
||||
|
||||
pvclock_page = vvar_start + PAGE_SIZE;
|
||||
hvclock_page = vvar_start + 2 * PAGE_SIZE;
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@ enum {
|
|||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
sym_hvclock_page,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
|
@ -82,6 +83,7 @@ const int special_pages[] = {
|
|||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
sym_hvclock_page,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
|
@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = {
|
|||
[sym_vvar_page] = {"vvar_page", true},
|
||||
[sym_hpet_page] = {"hpet_page", true},
|
||||
[sym_pvclock_page] = {"pvclock_page", true},
|
||||
[sym_hvclock_page] = {"hvclock_page", true},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||
},
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
unsigned int __read_mostly vdso64_enabled = 1;
|
||||
|
@ -121,6 +122,12 @@ static int vvar_fault(const struct vm_special_mapping *sm,
|
|||
vmf->address,
|
||||
__pa(pvti) >> PAGE_SHIFT);
|
||||
}
|
||||
} else if (sym_offset == image->sym_hvclock_page) {
|
||||
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
|
||||
|
||||
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
|
||||
ret = vm_insert_pfn(vma, vmf->address,
|
||||
vmalloc_to_pfn(tsc_pg));
|
||||
}
|
||||
|
||||
if (ret == 0 || ret == -EBUSY)
|
||||
|
|
|
@ -27,45 +27,22 @@
|
|||
#include <linux/clockchips.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
|
||||
static struct ms_hyperv_tsc_page *tsc_pg;
|
||||
|
||||
struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
|
||||
{
|
||||
return tsc_pg;
|
||||
}
|
||||
|
||||
static u64 read_hv_clock_tsc(struct clocksource *arg)
|
||||
{
|
||||
u64 current_tick;
|
||||
u64 current_tick = hv_read_tsc_page(tsc_pg);
|
||||
|
||||
if (tsc_pg->tsc_sequence != 0) {
|
||||
/*
|
||||
* Use the tsc page to compute the value.
|
||||
*/
|
||||
if (current_tick == U64_MAX)
|
||||
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
|
||||
|
||||
while (1) {
|
||||
u64 tmp;
|
||||
u32 sequence = tsc_pg->tsc_sequence;
|
||||
u64 cur_tsc;
|
||||
u64 scale = tsc_pg->tsc_scale;
|
||||
s64 offset = tsc_pg->tsc_offset;
|
||||
|
||||
rdtscll(cur_tsc);
|
||||
/* current_tick = ((cur_tsc *scale) >> 64) + offset */
|
||||
asm("mulq %3"
|
||||
: "=d" (current_tick), "=a" (tmp)
|
||||
: "a" (cur_tsc), "r" (scale));
|
||||
|
||||
current_tick += offset;
|
||||
if (tsc_pg->tsc_sequence == sequence)
|
||||
return current_tick;
|
||||
|
||||
if (tsc_pg->tsc_sequence != 0)
|
||||
continue;
|
||||
/*
|
||||
* Fallback using MSR method.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
|
||||
return current_tick;
|
||||
}
|
||||
|
||||
|
@ -139,7 +116,7 @@ void hyperv_init(void)
|
|||
/*
|
||||
* Register Hyper-V specific clocksource.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
|
||||
union hv_x64_msr_hypercall_contents tsc_msr;
|
||||
|
||||
|
@ -155,6 +132,9 @@ void hyperv_init(void)
|
|||
tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
|
||||
|
||||
wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
|
||||
|
||||
hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
|
||||
|
||||
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
|
||||
#define VCLOCK_MAX 2
|
||||
#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
|
||||
#define VCLOCK_MAX 3
|
||||
|
||||
struct arch_clocksource_data {
|
||||
int vclock_mode;
|
||||
|
|
|
@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs);
|
|||
bool hv_is_hypercall_page_setup(void);
|
||||
void hyperv_cleanup(void);
|
||||
#endif
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
|
||||
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
|
||||
{
|
||||
u64 scale, offset, cur_tsc;
|
||||
u32 sequence;
|
||||
|
||||
/*
|
||||
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
|
||||
* Top-Level Functional Specification ver. 3.0 and above. To get the
|
||||
* reference time we must do the following:
|
||||
* - READ ReferenceTscSequence
|
||||
* A special '0' value indicates the time source is unreliable and we
|
||||
* need to use something else. The currently published specification
|
||||
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
|
||||
* instead of '0' as the special value, see commit c35b82ef0294.
|
||||
* - ReferenceTime =
|
||||
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
|
||||
* - READ ReferenceTscSequence again. In case its value has changed
|
||||
* since our first reading we need to discard ReferenceTime and repeat
|
||||
* the whole sequence as the hypervisor was updating the page in
|
||||
* between.
|
||||
*/
|
||||
do {
|
||||
sequence = READ_ONCE(tsc_pg->tsc_sequence);
|
||||
if (!sequence)
|
||||
return U64_MAX;
|
||||
/*
|
||||
* Make sure we read sequence before we read other values from
|
||||
* TSC page.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
scale = READ_ONCE(tsc_pg->tsc_scale);
|
||||
offset = READ_ONCE(tsc_pg->tsc_offset);
|
||||
cur_tsc = rdtsc_ordered();
|
||||
|
||||
/*
|
||||
* Make sure we read sequence after we read all other values
|
||||
* from TSC page.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
|
||||
|
||||
return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -20,6 +20,7 @@ struct vdso_image {
|
|||
long sym_vvar_page;
|
||||
long sym_hpet_page;
|
||||
long sym_pvclock_page;
|
||||
long sym_hvclock_page;
|
||||
long sym_VDSO32_NOTE_MASK;
|
||||
long sym___kernel_sigreturn;
|
||||
long sym___kernel_rt_sigreturn;
|
||||
|
|
|
@ -7,6 +7,9 @@ config HYPERV
|
|||
Select this option to run Linux as a Hyper-V client operating
|
||||
system.
|
||||
|
||||
config HYPERV_TSCPAGE
|
||||
def_bool HYPERV && X86_64
|
||||
|
||||
config HYPERV_UTILS
|
||||
tristate "Microsoft Hyper-V Utilities driver"
|
||||
depends on HYPERV && CONNECTOR && NLS
|
||||
|
|
Loading…
Reference in New Issue