Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso updates from Ingo Molnar:
 "Add support for vDSO acceleration of the "Hyper-V TSC page", to speed
  up clock reading on Hyper-V guests"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
  x86/hyperv: Move TSC reading method to asm/mshyperv.h
  x86/hyperv: Implement hv_get_tsc_page()
This commit is contained in:
Linus Torvalds 2017-05-01 23:08:46 -07:00
commit aa2a4b6569
9 changed files with 109 additions and 35 deletions

View File

@ -17,6 +17,7 @@
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <asm/mshyperv.h>
#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
@ -32,6 +33,11 @@ extern u8 pvclock_page
__attribute__((visibility("hidden")));
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
extern u8 hvclock_page
__attribute__((visibility("hidden")));
#endif
#ifndef BUILD_VDSO32
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@ -141,6 +147,20 @@ static notrace u64 vread_pvclock(int *mode)
return last;
}
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
static notrace u64 vread_hvclock(int *mode)
{
const struct ms_hyperv_tsc_page *tsc_pg =
(const struct ms_hyperv_tsc_page *)&hvclock_page;
u64 current_tick = hv_read_tsc_page(tsc_pg);
if (current_tick != U64_MAX)
return current_tick;
*mode = VCLOCK_NONE;
return 0;
}
#endif
notrace static u64 vread_tsc(void)
{
@ -172,6 +192,10 @@ notrace static inline u64 vgetsns(int *mode)
#ifdef CONFIG_PARAVIRT_CLOCK
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode);
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
cycles = vread_hvclock(mode);
#endif
else
return 0;

View File

@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
vvar_start = . - 2 * PAGE_SIZE;
vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
pvclock_page = vvar_start + PAGE_SIZE;
hvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;

View File

@ -74,6 +74,7 @@ enum {
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@ -82,6 +83,7 @@ const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
struct vdso_sym {
@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},

View File

@ -22,6 +22,7 @@
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
#include <asm/mshyperv.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@ -121,6 +122,12 @@ static int vvar_fault(const struct vm_special_mapping *sm,
vmf->address,
__pa(pvti) >> PAGE_SHIFT);
}
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
ret = vm_insert_pfn(vma, vmf->address,
vmalloc_to_pfn(tsc_pg));
}
if (ret == 0 || ret == -EBUSY)

View File

@ -27,45 +27,22 @@
#include <linux/clockchips.h>
#ifdef CONFIG_X86_64
#ifdef CONFIG_HYPERV_TSCPAGE
static struct ms_hyperv_tsc_page *tsc_pg;
struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return tsc_pg;
}
static u64 read_hv_clock_tsc(struct clocksource *arg)
{
u64 current_tick;
u64 current_tick = hv_read_tsc_page(tsc_pg);
if (tsc_pg->tsc_sequence != 0) {
/*
* Use the tsc page to compute the value.
*/
if (current_tick == U64_MAX)
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
while (1) {
u64 tmp;
u32 sequence = tsc_pg->tsc_sequence;
u64 cur_tsc;
u64 scale = tsc_pg->tsc_scale;
s64 offset = tsc_pg->tsc_offset;
rdtscll(cur_tsc);
/* current_tick = ((cur_tsc *scale) >> 64) + offset */
asm("mulq %3"
: "=d" (current_tick), "=a" (tmp)
: "a" (cur_tsc), "r" (scale));
current_tick += offset;
if (tsc_pg->tsc_sequence == sequence)
return current_tick;
if (tsc_pg->tsc_sequence != 0)
continue;
/*
* Fallback using MSR method.
*/
break;
}
}
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
return current_tick;
}
@ -139,7 +116,7 @@ void hyperv_init(void)
/*
* Register Hyper-V specific clocksource.
*/
#ifdef CONFIG_X86_64
#ifdef CONFIG_HYPERV_TSCPAGE
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
union hv_x64_msr_hypercall_contents tsc_msr;
@ -155,6 +132,9 @@ void hyperv_init(void)
tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
return;
}

View File

@ -6,7 +6,8 @@
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
#define VCLOCK_MAX 2
#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;

View File

@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
{
u64 scale, offset, cur_tsc;
u32 sequence;
/*
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
* Top-Level Functional Specification ver. 3.0 and above. To get the
* reference time we must do the following:
* - READ ReferenceTscSequence
* A special '0' value indicates the time source is unreliable and we
* need to use something else. The currently published specification
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
* instead of '0' as the special value, see commit c35b82ef0294.
* - ReferenceTime =
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
* - READ ReferenceTscSequence again. In case its value has changed
* since our first reading we need to discard ReferenceTime and repeat
* the whole sequence as the hypervisor was updating the page in
* between.
*/
do {
sequence = READ_ONCE(tsc_pg->tsc_sequence);
if (!sequence)
return U64_MAX;
/*
* Make sure we read sequence before we read other values from
* TSC page.
*/
smp_rmb();
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
* from TSC page.
*/
smp_rmb();
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
}
#else
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
#endif
#endif

View File

@ -20,6 +20,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
long sym_pvclock_page;
long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;

View File

@ -7,6 +7,9 @@ config HYPERV
Select this option to run Linux as a Hyper-V client operating
system.
config HYPERV_TSCPAGE
def_bool HYPERV && X86_64
config HYPERV_UTILS
tristate "Microsoft Hyper-V Utilities driver"
depends on HYPERV && CONNECTOR && NLS