Merge branch 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 hyperv updates from Ingo Molnar: "Misc updates to the hyperv guest code: - Rework clockevents initialization to better support hibernation - Allow guests to enable InvariantTSC - Micro-optimize send_ipi_one" * 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/hyperv: Initialize clockevents earlier in CPU onlining x86/hyperv: Allow guests to enable InvariantTSC x86/hyperv: Micro-optimize send_ipi_one()
This commit is contained in:
commit
64d6a12094
|
@ -194,10 +194,20 @@ do_ex_hypercall:
|
|||
|
||||
static bool __send_ipi_one(int cpu, int vector)
|
||||
{
|
||||
struct cpumask mask = CPU_MASK_NONE;
|
||||
int vp = hv_cpu_number_to_vp_number(cpu);
|
||||
|
||||
cpumask_set_cpu(cpu, &mask);
|
||||
return __send_ipi_mask(&mask, vector);
|
||||
trace_hyperv_send_ipi_one(cpu, vector);
|
||||
|
||||
if (!hv_hypercall_pg || (vp == VP_INVAL))
|
||||
return false;
|
||||
|
||||
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
|
||||
return false;
|
||||
|
||||
if (vp >= 64)
|
||||
return __send_ipi_mask_ex(cpumask_of(cpu), vector);
|
||||
|
||||
return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
|
||||
}
|
||||
|
||||
static void hv_send_ipi(int cpu, int vector)
|
||||
|
|
|
@ -311,6 +311,12 @@ void __init hyperv_init(void)
|
|||
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
|
||||
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
|
||||
|
||||
/*
|
||||
* Ignore any errors in setting up stimer clockevents
|
||||
* as we can run with the LAPIC timer as a fallback.
|
||||
*/
|
||||
(void)hv_stimer_alloc();
|
||||
|
||||
hv_apic_init();
|
||||
|
||||
x86_init.pci.arch_init = hv_pci_init;
|
||||
|
|
|
@ -86,6 +86,8 @@
|
|||
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
|
||||
/* AccessReenlightenmentControls privilege */
|
||||
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
|
||||
/* AccessTscInvariantControls privilege */
|
||||
#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
|
||||
|
||||
/*
|
||||
* Feature identification: indicates which flags were specified at partition
|
||||
|
@ -278,6 +280,9 @@
|
|||
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
|
||||
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
|
||||
|
||||
/* TSC invariant control */
|
||||
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
|
||||
|
||||
/*
|
||||
* Declare the MSR used to setup pages used to communicate with the hypervisor.
|
||||
*/
|
||||
|
|
|
@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
|
|||
__entry->ncpus, __entry->vector)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hyperv_send_ipi_one,
|
||||
TP_PROTO(int cpu,
|
||||
int vector),
|
||||
TP_ARGS(cpu, vector),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu)
|
||||
__field(int, vector)
|
||||
),
|
||||
TP_fast_assign(__entry->cpu = cpu;
|
||||
__entry->vector = vector;
|
||||
),
|
||||
TP_printk("cpu %d vector %x",
|
||||
__entry->cpu, __entry->vector)
|
||||
);
|
||||
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
|
@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
|
|||
machine_ops.shutdown = hv_machine_shutdown;
|
||||
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
|
||||
#endif
|
||||
mark_tsc_unstable("running on Hyper-V");
|
||||
if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
|
||||
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
|
||||
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
|
||||
} else {
|
||||
mark_tsc_unstable("running on Hyper-V");
|
||||
}
|
||||
|
||||
/*
|
||||
* Generation 2 instances don't support reading the NMI status from
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <linux/clocksource.h>
|
||||
#include <linux/sched_clock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <clocksource/hyperv_timer.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
|
|||
* mechanism is used when running on older versions of Hyper-V
|
||||
* that don't support Direct Mode. While Hyper-V provides
|
||||
* four stimer's per CPU, Linux uses only stimer0.
|
||||
*
|
||||
* Because Direct Mode does not require processing a VMbus
|
||||
* message, stimer interrupts can be enabled earlier in the
|
||||
* process of booting a CPU, and consistent with when timer
|
||||
* interrupts are enabled for other clocksource drivers.
|
||||
* However, for legacy versions of Hyper-V when Direct Mode
|
||||
* is not enabled, setting up stimer interrupts must be
|
||||
* delayed until VMbus is initialized and can process the
|
||||
* interrupt message.
|
||||
*/
|
||||
static bool direct_mode_enabled;
|
||||
|
||||
|
@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
|
|||
/*
|
||||
* hv_stimer_init - Per-cpu initialization of the clockevent
|
||||
*/
|
||||
void hv_stimer_init(unsigned int cpu)
|
||||
static int hv_stimer_init(unsigned int cpu)
|
||||
{
|
||||
struct clock_event_device *ce;
|
||||
|
||||
/*
|
||||
* Synthetic timers are always available except on old versions of
|
||||
* Hyper-V on x86. In that case, just return as Linux will use a
|
||||
* clocksource based on emulated PIT or LAPIC timer hardware.
|
||||
*/
|
||||
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
|
||||
return;
|
||||
if (!hv_clock_event)
|
||||
return 0;
|
||||
|
||||
ce = per_cpu_ptr(hv_clock_event, cpu);
|
||||
ce->name = "Hyper-V clockevent";
|
||||
|
@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
|
|||
HV_CLOCK_HZ,
|
||||
HV_MIN_DELTA_TICKS,
|
||||
HV_MAX_MAX_DELTA_TICKS);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_init);
|
||||
|
||||
/*
|
||||
* hv_stimer_cleanup - Per-cpu cleanup of the clockevent
|
||||
*/
|
||||
void hv_stimer_cleanup(unsigned int cpu)
|
||||
int hv_stimer_cleanup(unsigned int cpu)
|
||||
{
|
||||
struct clock_event_device *ce;
|
||||
|
||||
/* Turn off clockevent device */
|
||||
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
|
||||
ce = per_cpu_ptr(hv_clock_event, cpu);
|
||||
if (!hv_clock_event)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* In the legacy case where Direct Mode is not enabled
|
||||
* (which can only be on x86/64), stimer cleanup happens
|
||||
* relatively early in the CPU offlining process. We
|
||||
* must unbind the stimer-based clockevent device so
|
||||
* that the LAPIC timer can take over until clockevents
|
||||
* are no longer needed in the offlining process. Note
|
||||
* that clockevents_unbind_device() eventually calls
|
||||
* hv_ce_shutdown().
|
||||
*
|
||||
* The unbind should not be done when Direct Mode is
|
||||
* enabled because we may be on an architecture where
|
||||
* there are no other clockevent devices to fallback to.
|
||||
*/
|
||||
ce = per_cpu_ptr(hv_clock_event, cpu);
|
||||
if (direct_mode_enabled)
|
||||
hv_ce_shutdown(ce);
|
||||
}
|
||||
else
|
||||
clockevents_unbind_device(ce, cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
|
||||
|
||||
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
|
||||
int hv_stimer_alloc(int sint)
|
||||
int hv_stimer_alloc(void)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Synthetic timers are always available except on old versions of
|
||||
* Hyper-V on x86. In that case, return as error as Linux will use a
|
||||
* clockevent based on emulated LAPIC timer hardware.
|
||||
*/
|
||||
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
|
||||
return -EINVAL;
|
||||
|
||||
hv_clock_event = alloc_percpu(struct clock_event_device);
|
||||
if (!hv_clock_event)
|
||||
|
@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
|
|||
if (direct_mode_enabled) {
|
||||
ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
|
||||
hv_stimer0_isr);
|
||||
if (ret) {
|
||||
free_percpu(hv_clock_event);
|
||||
hv_clock_event = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
goto free_percpu;
|
||||
|
||||
stimer0_message_sint = sint;
|
||||
return 0;
|
||||
/*
|
||||
* Since we are in Direct Mode, stimer initialization
|
||||
* can be done now with a CPUHP value in the same range
|
||||
* as other clockevent devices.
|
||||
*/
|
||||
ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
|
||||
"clockevents/hyperv/stimer:starting",
|
||||
hv_stimer_init, hv_stimer_cleanup);
|
||||
if (ret < 0)
|
||||
goto free_stimer0_irq;
|
||||
}
|
||||
return ret;
|
||||
|
||||
free_stimer0_irq:
|
||||
hv_remove_stimer0_irq(stimer0_irq);
|
||||
stimer0_irq = 0;
|
||||
free_percpu:
|
||||
free_percpu(hv_clock_event);
|
||||
hv_clock_event = NULL;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_alloc);
|
||||
|
||||
/*
|
||||
* hv_stimer_legacy_init -- Called from the VMbus driver to handle
|
||||
* the case when Direct Mode is not enabled, and the stimer
|
||||
* must be initialized late in the CPU onlining process.
|
||||
*
|
||||
*/
|
||||
void hv_stimer_legacy_init(unsigned int cpu, int sint)
|
||||
{
|
||||
if (direct_mode_enabled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This function gets called by each vCPU, so setting the
|
||||
* global stimer_message_sint value each time is conceptually
|
||||
* not ideal, but the value passed in is always the same and
|
||||
* it avoids introducing yet another interface into this
|
||||
* clocksource driver just to set the sint in the legacy case.
|
||||
*/
|
||||
stimer0_message_sint = sint;
|
||||
(void)hv_stimer_init(cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
|
||||
|
||||
/*
|
||||
* hv_stimer_legacy_cleanup -- Called from the VMbus driver to
|
||||
* handle the case when Direct Mode is not enabled, and the
|
||||
* stimer must be cleaned up early in the CPU offlining
|
||||
* process.
|
||||
*/
|
||||
void hv_stimer_legacy_cleanup(unsigned int cpu)
|
||||
{
|
||||
if (direct_mode_enabled)
|
||||
return;
|
||||
(void)hv_stimer_cleanup(cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
|
||||
|
||||
|
||||
/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
|
||||
void hv_stimer_free(void)
|
||||
{
|
||||
if (direct_mode_enabled && (stimer0_irq != 0)) {
|
||||
if (!hv_clock_event)
|
||||
return;
|
||||
|
||||
if (direct_mode_enabled) {
|
||||
cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
|
||||
hv_remove_stimer0_irq(stimer0_irq);
|
||||
stimer0_irq = 0;
|
||||
}
|
||||
|
@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
|
|||
void hv_stimer_global_cleanup(void)
|
||||
{
|
||||
int cpu;
|
||||
struct clock_event_device *ce;
|
||||
|
||||
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
|
||||
for_each_present_cpu(cpu) {
|
||||
ce = per_cpu_ptr(hv_clock_event, cpu);
|
||||
clockevents_unbind_device(ce, cpu);
|
||||
}
|
||||
/*
|
||||
* hv_stime_legacy_cleanup() will stop the stimer if Direct
|
||||
* Mode is not enabled, and fallback to the LAPIC timer.
|
||||
*/
|
||||
for_each_present_cpu(cpu) {
|
||||
hv_stimer_legacy_cleanup(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* If Direct Mode is enabled, the cpuhp teardown callback
|
||||
* (hv_stimer_cleanup) will be run on all CPUs to stop the
|
||||
* stimers.
|
||||
*/
|
||||
hv_stimer_free();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
|
||||
|
|
|
@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu)
|
|||
{
|
||||
hv_synic_enable_regs(cpu);
|
||||
|
||||
hv_stimer_init(cpu);
|
||||
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu)
|
|||
if (channel_found && vmbus_connection.conn_state == CONNECTED)
|
||||
return -EBUSY;
|
||||
|
||||
hv_stimer_cleanup(cpu);
|
||||
hv_stimer_legacy_cleanup(cpu);
|
||||
|
||||
hv_synic_disable_regs(cpu);
|
||||
|
||||
|
|
|
@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void)
|
|||
if (ret)
|
||||
goto err_alloc;
|
||||
|
||||
ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
|
||||
if (ret < 0)
|
||||
goto err_alloc;
|
||||
|
||||
/*
|
||||
* Initialize the per-cpu interrupt state and stimer state.
|
||||
* Then connect to the host.
|
||||
|
@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void)
|
|||
err_connect:
|
||||
cpuhp_remove_state(hyperv_cpuhp_online);
|
||||
err_cpuhp:
|
||||
hv_stimer_free();
|
||||
err_alloc:
|
||||
hv_synic_free();
|
||||
err_alloc:
|
||||
hv_remove_vmbus_irq();
|
||||
|
||||
bus_unregister(&hv_bus);
|
||||
|
@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs)
|
|||
static int hv_synic_suspend(void)
|
||||
{
|
||||
/*
|
||||
* When we reach here, all the non-boot CPUs have been offlined, and
|
||||
* the stimers on them have been unbound in hv_synic_cleanup() ->
|
||||
* When we reach here, all the non-boot CPUs have been offlined.
|
||||
* If we're in a legacy configuration where stimer Direct Mode is
|
||||
* not enabled, the stimers on the non-boot CPUs have been unbound
|
||||
* in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
|
||||
* hv_stimer_cleanup() -> clockevents_unbind_device().
|
||||
*
|
||||
* hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
|
||||
* we do not unbind the stimer on CPU0 because: 1) it's unnecessary
|
||||
* because the interrupts remain disabled between syscore_suspend()
|
||||
* and syscore_resume(): see create_image() and resume_target_kernel();
|
||||
* hv_synic_suspend() only runs on CPU0 with interrupts disabled.
|
||||
* Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
|
||||
* 1) it's unnecessary as interrupts remain disabled between
|
||||
* syscore_suspend() and syscore_resume(): see create_image() and
|
||||
* resume_target_kernel()
|
||||
* 2) the stimer on CPU0 is automatically disabled later by
|
||||
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
|
||||
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
|
||||
* would be triggered if we call clockevents_unbind_device(), which
|
||||
* may sleep, in an interrupts-disabled context. So, we intentionally
|
||||
* don't call hv_stimer_cleanup(0) here.
|
||||
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
|
||||
* 3) a warning would be triggered if we call
|
||||
* clockevents_unbind_device(), which may sleep, in an
|
||||
* interrupts-disabled context.
|
||||
*/
|
||||
|
||||
hv_synic_disable_regs(0);
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
#define HV_MIN_DELTA_TICKS 1
|
||||
|
||||
/* Routines called by the VMbus driver */
|
||||
extern int hv_stimer_alloc(int sint);
|
||||
extern int hv_stimer_alloc(void);
|
||||
extern void hv_stimer_free(void);
|
||||
extern void hv_stimer_init(unsigned int cpu);
|
||||
extern void hv_stimer_cleanup(unsigned int cpu);
|
||||
extern int hv_stimer_cleanup(unsigned int cpu);
|
||||
extern void hv_stimer_legacy_init(unsigned int cpu, int sint);
|
||||
extern void hv_stimer_legacy_cleanup(unsigned int cpu);
|
||||
extern void hv_stimer_global_cleanup(void);
|
||||
extern void hv_stimer0_isr(void);
|
||||
|
||||
|
|
|
@ -129,6 +129,7 @@ enum cpuhp_state {
|
|||
CPUHP_AP_ARC_TIMER_STARTING,
|
||||
CPUHP_AP_RISCV_TIMER_STARTING,
|
||||
CPUHP_AP_CSKY_TIMER_STARTING,
|
||||
CPUHP_AP_HYPERV_TIMER_STARTING,
|
||||
CPUHP_AP_KVM_STARTING,
|
||||
CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
|
||||
CPUHP_AP_KVM_ARM_VGIC_STARTING,
|
||||
|
|
Loading…
Reference in New Issue