545 lines
15 KiB
C
545 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
* Architecture neutral utility routines for interacting with
|
|
* Hyper-V. This file is specifically for code that must be
|
|
* built-in to the kernel image when CONFIG_HYPERV is set
|
|
* (vs. being in a module) because it is called from architecture
|
|
* specific code under arch/.
|
|
*
|
|
* Copyright (C) 2021, Microsoft, Inc.
|
|
*
|
|
* Author : Michael Kelley <mikelley@microsoft.com>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/export.h>
|
|
#include <linux/bitfield.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/panic_notifier.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/kmsg_dump.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/dma-map-ops.h>
|
|
#include <asm/hyperv-tlfs.h>
|
|
#include <asm/mshyperv.h>
|
|
|
|
/*
|
|
* hv_root_partition, ms_hyperv and hv_nested are defined here with other
|
|
* Hyper-V specific globals so they are shared across all architectures and are
|
|
* built only when CONFIG_HYPERV is defined. But on x86,
|
|
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
|
|
* defined, and it uses these three variables. So mark them as __weak
|
|
* here, allowing for an overriding definition in the module containing
|
|
* ms_hyperv_init_platform().
|
|
*/
|
|
bool __weak hv_root_partition;
|
|
EXPORT_SYMBOL_GPL(hv_root_partition);
|
|
|
|
bool __weak hv_nested;
|
|
EXPORT_SYMBOL_GPL(hv_nested);
|
|
|
|
struct ms_hyperv_info __weak ms_hyperv;
|
|
EXPORT_SYMBOL_GPL(ms_hyperv);
|
|
|
|
u32 *hv_vp_index;
|
|
EXPORT_SYMBOL_GPL(hv_vp_index);
|
|
|
|
u32 hv_max_vp_index;
|
|
EXPORT_SYMBOL_GPL(hv_max_vp_index);
|
|
|
|
void * __percpu *hyperv_pcpu_input_arg;
|
|
EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
|
|
|
|
void * __percpu *hyperv_pcpu_output_arg;
|
|
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
|
|
|
|
static void hv_kmsg_dump_unregister(void);
|
|
|
|
static struct ctl_table_header *hv_ctl_table_hdr;
|
|
|
|
/*
|
|
* Hyper-V specific initialization and shutdown code that is
|
|
* common across all architectures. Called from architecture
|
|
* specific initialization functions.
|
|
*/
|
|
|
|
void __init hv_common_free(void)
|
|
{
|
|
unregister_sysctl_table(hv_ctl_table_hdr);
|
|
hv_ctl_table_hdr = NULL;
|
|
|
|
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
|
|
hv_kmsg_dump_unregister();
|
|
|
|
kfree(hv_vp_index);
|
|
hv_vp_index = NULL;
|
|
|
|
free_percpu(hyperv_pcpu_output_arg);
|
|
hyperv_pcpu_output_arg = NULL;
|
|
|
|
free_percpu(hyperv_pcpu_input_arg);
|
|
hyperv_pcpu_input_arg = NULL;
|
|
}
|
|
|
|
/*
|
|
* Functions for allocating and freeing memory with size and
|
|
* alignment HV_HYP_PAGE_SIZE. These functions are needed because
|
|
* the guest page size may not be the same as the Hyper-V page
|
|
* size. We depend upon kmalloc() aligning power-of-two size
|
|
* allocations to the allocation size boundary, so that the
|
|
* allocated memory appears to Hyper-V as a page of the size
|
|
* it expects.
|
|
*/
|
|
|
|
void *hv_alloc_hyperv_page(void)
|
|
{
|
|
BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
|
|
|
|
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
|
|
return (void *)__get_free_page(GFP_KERNEL);
|
|
else
|
|
return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
|
|
|
|
void *hv_alloc_hyperv_zeroed_page(void)
|
|
{
|
|
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
|
|
return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
|
else
|
|
return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
|
|
|
|
void hv_free_hyperv_page(unsigned long addr)
|
|
{
|
|
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
|
|
free_page(addr);
|
|
else
|
|
kfree((void *)addr);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
|
|
|
|
static void *hv_panic_page;
|
|
|
|
/*
|
|
* Boolean to control whether to report panic messages over Hyper-V.
|
|
*
|
|
* It can be set via /proc/sys/kernel/hyperv_record_panic_msg
|
|
*/
|
|
static int sysctl_record_panic_msg = 1;
|
|
|
|
/*
|
|
* sysctl option to allow the user to control whether kmsg data should be
|
|
* reported to Hyper-V on panic.
|
|
*/
|
|
static struct ctl_table hv_ctl_table[] = {
|
|
{
|
|
.procname = "hyperv_record_panic_msg",
|
|
.data = &sysctl_record_panic_msg,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{}
|
|
};
|
|
|
|
static int hv_die_panic_notify_crash(struct notifier_block *self,
|
|
unsigned long val, void *args);
|
|
|
|
static struct notifier_block hyperv_die_report_block = {
|
|
.notifier_call = hv_die_panic_notify_crash,
|
|
};
|
|
|
|
static struct notifier_block hyperv_panic_report_block = {
|
|
.notifier_call = hv_die_panic_notify_crash,
|
|
};
|
|
|
|
/*
|
|
* The following callback works both as die and panic notifier; its
|
|
* goal is to provide panic information to the hypervisor unless the
|
|
* kmsg dumper is used [see hv_kmsg_dump()], which provides more
|
|
* information but isn't always available.
|
|
*
|
|
* Notice that both the panic/die report notifiers are registered only
|
|
* if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
|
|
*/
|
|
static int hv_die_panic_notify_crash(struct notifier_block *self,
|
|
unsigned long val, void *args)
|
|
{
|
|
struct pt_regs *regs;
|
|
bool is_die;
|
|
|
|
/* Don't notify Hyper-V unless we have a die oops event or panic. */
|
|
if (self == &hyperv_panic_report_block) {
|
|
is_die = false;
|
|
regs = current_pt_regs();
|
|
} else { /* die event */
|
|
if (val != DIE_OOPS)
|
|
return NOTIFY_DONE;
|
|
|
|
is_die = true;
|
|
regs = ((struct die_args *)args)->regs;
|
|
}
|
|
|
|
/*
|
|
* Hyper-V should be notified only once about a panic/die. If we will
|
|
* be calling hv_kmsg_dump() later with kmsg data, don't do the
|
|
* notification here.
|
|
*/
|
|
if (!sysctl_record_panic_msg || !hv_panic_page)
|
|
hyperv_report_panic(regs, val, is_die);
|
|
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
/*
|
|
* Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
|
|
* buffer and call into Hyper-V to transfer the data.
|
|
*/
|
|
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
|
|
enum kmsg_dump_reason reason)
|
|
{
|
|
struct kmsg_dump_iter iter;
|
|
size_t bytes_written;
|
|
|
|
/* We are only interested in panics. */
|
|
if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
|
|
return;
|
|
|
|
/*
|
|
* Write dump contents to the page. No need to synchronize; panic should
|
|
* be single-threaded.
|
|
*/
|
|
kmsg_dump_rewind(&iter);
|
|
kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
|
|
&bytes_written);
|
|
if (!bytes_written)
|
|
return;
|
|
/*
|
|
* P3 to contain the physical address of the panic page & P4 to
|
|
* contain the size of the panic data in that page. Rest of the
|
|
* registers are no-op when the NOTIFY_MSG flag is set.
|
|
*/
|
|
hv_set_register(HV_REGISTER_CRASH_P0, 0);
|
|
hv_set_register(HV_REGISTER_CRASH_P1, 0);
|
|
hv_set_register(HV_REGISTER_CRASH_P2, 0);
|
|
hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
|
|
hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
|
|
|
|
/*
|
|
* Let Hyper-V know there is crash data available along with
|
|
* the panic message.
|
|
*/
|
|
hv_set_register(HV_REGISTER_CRASH_CTL,
|
|
(HV_CRASH_CTL_CRASH_NOTIFY |
|
|
HV_CRASH_CTL_CRASH_NOTIFY_MSG));
|
|
}
|
|
|
|
static struct kmsg_dumper hv_kmsg_dumper = {
|
|
.dump = hv_kmsg_dump,
|
|
};
|
|
|
|
static void hv_kmsg_dump_unregister(void)
|
|
{
|
|
kmsg_dump_unregister(&hv_kmsg_dumper);
|
|
unregister_die_notifier(&hyperv_die_report_block);
|
|
atomic_notifier_chain_unregister(&panic_notifier_list,
|
|
&hyperv_panic_report_block);
|
|
|
|
hv_free_hyperv_page((unsigned long)hv_panic_page);
|
|
hv_panic_page = NULL;
|
|
}
|
|
|
|
static void hv_kmsg_dump_register(void)
|
|
{
|
|
int ret;
|
|
|
|
hv_panic_page = hv_alloc_hyperv_zeroed_page();
|
|
if (!hv_panic_page) {
|
|
pr_err("Hyper-V: panic message page memory allocation failed\n");
|
|
return;
|
|
}
|
|
|
|
ret = kmsg_dump_register(&hv_kmsg_dumper);
|
|
if (ret) {
|
|
pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
|
|
hv_free_hyperv_page((unsigned long)hv_panic_page);
|
|
hv_panic_page = NULL;
|
|
}
|
|
}
|
|
|
|
int __init hv_common_init(void)
|
|
{
|
|
int i;
|
|
|
|
if (hv_is_isolation_supported())
|
|
sysctl_record_panic_msg = 0;
|
|
|
|
/*
|
|
* Hyper-V expects to get crash register data or kmsg when
|
|
* crash enlightment is available and system crashes. Set
|
|
* crash_kexec_post_notifiers to be true to make sure that
|
|
* calling crash enlightment interface before running kdump
|
|
* kernel.
|
|
*/
|
|
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
|
|
u64 hyperv_crash_ctl;
|
|
|
|
crash_kexec_post_notifiers = true;
|
|
pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
|
|
|
|
/*
|
|
* Panic message recording (sysctl_record_panic_msg)
|
|
* is enabled by default in non-isolated guests and
|
|
* disabled by default in isolated guests; the panic
|
|
* message recording won't be available in isolated
|
|
* guests should the following registration fail.
|
|
*/
|
|
hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
|
|
if (!hv_ctl_table_hdr)
|
|
pr_err("Hyper-V: sysctl table register error");
|
|
|
|
/*
|
|
* Register for panic kmsg callback only if the right
|
|
* capability is supported by the hypervisor.
|
|
*/
|
|
hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
|
|
if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
|
|
hv_kmsg_dump_register();
|
|
|
|
register_die_notifier(&hyperv_die_report_block);
|
|
atomic_notifier_chain_register(&panic_notifier_list,
|
|
&hyperv_panic_report_block);
|
|
}
|
|
|
|
/*
|
|
* Allocate the per-CPU state for the hypercall input arg.
|
|
* If this allocation fails, we will not be able to setup
|
|
* (per-CPU) hypercall input page and thus this failure is
|
|
* fatal on Hyper-V.
|
|
*/
|
|
hyperv_pcpu_input_arg = alloc_percpu(void *);
|
|
BUG_ON(!hyperv_pcpu_input_arg);
|
|
|
|
/* Allocate the per-CPU state for output arg for root */
|
|
if (hv_root_partition) {
|
|
hyperv_pcpu_output_arg = alloc_percpu(void *);
|
|
BUG_ON(!hyperv_pcpu_output_arg);
|
|
}
|
|
|
|
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
|
|
GFP_KERNEL);
|
|
if (!hv_vp_index) {
|
|
hv_common_free();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
for (i = 0; i < num_possible_cpus(); i++)
|
|
hv_vp_index[i] = VP_INVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Hyper-V specific initialization and die code for
|
|
* individual CPUs that is common across all architectures.
|
|
* Called by the CPU hotplug mechanism.
|
|
*/
|
|
|
|
int hv_common_cpu_init(unsigned int cpu)
|
|
{
|
|
void **inputarg, **outputarg;
|
|
u64 msr_vp_index;
|
|
gfp_t flags;
|
|
int pgcount = hv_root_partition ? 2 : 1;
|
|
|
|
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
|
|
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
|
|
|
|
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
|
|
/*
|
|
* hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
|
|
* allocated if this CPU was previously online and then taken offline
|
|
*/
|
|
if (!*inputarg) {
|
|
*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
|
if (!(*inputarg))
|
|
return -ENOMEM;
|
|
|
|
if (hv_root_partition) {
|
|
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
|
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
|
|
|
|
hv_vp_index[cpu] = msr_vp_index;
|
|
|
|
if (msr_vp_index > hv_max_vp_index)
|
|
hv_max_vp_index = msr_vp_index;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int hv_common_cpu_die(unsigned int cpu)
|
|
{
|
|
/*
|
|
* The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
|
|
* is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
|
|
* may be used by the Hyper-V vPCI driver in reassigning interrupts
|
|
* as part of the offlining process. The interrupt reassignment
|
|
* happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
|
|
* called this function.
|
|
*
|
|
* If a previously offlined CPU is brought back online again, the
|
|
* originally allocated memory is reused in hv_common_cpu_init().
|
|
*/
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
|
|
bool hv_query_ext_cap(u64 cap_query)
|
|
{
|
|
/*
|
|
* The address of the 'hv_extended_cap' variable will be used as an
|
|
* output parameter to the hypercall below and so it should be
|
|
* compatible with 'virt_to_phys'. Which means, it's address should be
|
|
* directly mapped. Use 'static' to keep it compatible; stack variables
|
|
* can be virtually mapped, making them incompatible with
|
|
* 'virt_to_phys'.
|
|
* Hypercall input/output addresses should also be 8-byte aligned.
|
|
*/
|
|
static u64 hv_extended_cap __aligned(8);
|
|
static bool hv_extended_cap_queried;
|
|
u64 status;
|
|
|
|
/*
|
|
* Querying extended capabilities is an extended hypercall. Check if the
|
|
* partition supports extended hypercall, first.
|
|
*/
|
|
if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
|
|
return false;
|
|
|
|
/* Extended capabilities do not change at runtime. */
|
|
if (hv_extended_cap_queried)
|
|
return hv_extended_cap & cap_query;
|
|
|
|
status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
|
|
&hv_extended_cap);
|
|
|
|
/*
|
|
* The query extended capabilities hypercall should not fail under
|
|
* any normal circumstances. Avoid repeatedly making the hypercall, on
|
|
* error.
|
|
*/
|
|
hv_extended_cap_queried = true;
|
|
if (!hv_result_success(status)) {
|
|
pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
|
|
status);
|
|
return false;
|
|
}
|
|
|
|
return hv_extended_cap & cap_query;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_query_ext_cap);
|
|
|
|
void hv_setup_dma_ops(struct device *dev, bool coherent)
|
|
{
|
|
/*
|
|
* Hyper-V does not offer a vIOMMU in the guest
|
|
* VM, so pass 0/NULL for the IOMMU settings
|
|
*/
|
|
arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
|
|
|
|
bool hv_is_hibernation_supported(void)
|
|
{
|
|
return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
|
|
|
|
/*
|
|
* Default function to read the Hyper-V reference counter, independent
|
|
* of whether Hyper-V enlightened clocks/timers are being used. But on
|
|
* architectures where it is used, Hyper-V enlightenment code in
|
|
* hyperv_timer.c may override this function.
|
|
*/
|
|
static u64 __hv_read_ref_counter(void)
|
|
{
|
|
return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
|
|
}
|
|
|
|
u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
|
|
EXPORT_SYMBOL_GPL(hv_read_reference_counter);
|
|
|
|
/* These __weak functions provide default "no-op" behavior and
|
|
* may be overridden by architecture specific versions. Architectures
|
|
* for which the default "no-op" behavior is sufficient can leave
|
|
* them unimplemented and not be cluttered with a bunch of stub
|
|
* functions in arch-specific code.
|
|
*/
|
|
|
|
bool __weak hv_is_isolation_supported(void)
|
|
{
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
|
|
|
|
bool __weak hv_isolation_type_snp(void)
|
|
{
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
|
|
|
|
void __weak hv_setup_vmbus_handler(void (*handler)(void))
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
|
|
|
|
void __weak hv_remove_vmbus_handler(void)
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
|
|
|
|
void __weak hv_setup_kexec_handler(void (*handler)(void))
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
|
|
|
|
void __weak hv_remove_kexec_handler(void)
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
|
|
|
|
void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
|
|
|
|
void __weak hv_remove_crash_handler(void)
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
|
|
|
|
void __weak hyperv_cleanup(void)
|
|
{
|
|
}
|
|
EXPORT_SYMBOL_GPL(hyperv_cleanup);
|
|
|
|
u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
|
|
{
|
|
return HV_STATUS_INVALID_PARAMETER;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
|