From 2a22ee6c3ab1d761bc9c04f1e4117edd55b82f09 Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Thu, 15 Mar 2018 03:43:20 +0100 Subject: [PATCH 1/7] xen: xenbus_dev_frontend: Fix XS_TRANSACTION_END handling Commit fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") made a subtle change to the semantic of xenbus_dev_request_and_reply() and xenbus_transaction_end(). Before on an error response to XS_TRANSACTION_END xenbus_dev_request_and_reply() would not decrement the active transaction counter. But xenbus_transaction_end() has always counted the transaction as finished regardless of the response. The new behavior is that xenbus_dev_request_and_reply() and xenbus_transaction_end() will always count the transaction as finished regardless the response code (handled in xs_request_exit()). But xenbus_dev_frontend tries to end a transaction on closing of the device if the XS_TRANSACTION_END failed before. Trying to close the transaction twice corrupts the reference count. So fix this by also considering a transaction closed if we have sent XS_TRANSACTION_END once regardless of the return code. Cc: # 4.11 Fixes: fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") Signed-off-by: Simon Gaiser Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index a493e99bed21..81a84b3c1c50 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -365,7 +365,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) if (WARN_ON(rc)) goto out; } - } else if (req->msg.type == XS_TRANSACTION_END) { + } else if (req->type == XS_TRANSACTION_END) { trans = xenbus_get_transaction(u, req->msg.tx_id); if (WARN_ON(!trans)) goto out; From b93008d1ac657dc67819330c5995e65e7c3e7978 Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Thu, 15 Mar 2018 03:43:21 +0100 Subject: [PATCH 2/7] xen: xenbus: Catch closing of non existent transactions Users of the xenbus functions should never close a non existent transaction (for example by trying to closing the same transaction twice) but better catch it in xs_request_exit() than to corrupt the reference counter. Signed-off-by: Simon Gaiser Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_xs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3f3b29398ab8..49a3874ae6bb 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -140,7 +140,9 @@ void xs_request_exit(struct xb_req_data *req) spin_lock(&xs_state_lock); xs_state_users--; if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) || - req->type == XS_TRANSACTION_END) + (req->type == XS_TRANSACTION_END && + !WARN_ON_ONCE(req->msg.type == XS_ERROR && + !strcmp(req->body, "ENOENT")))) xs_state_users--; spin_unlock(&xs_state_lock); From 8fe5ab411209ac6e2c7021131e622fd004506d1a Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Thu, 15 Mar 2018 03:43:22 +0100 Subject: [PATCH 3/7] xen: xenbus_dev_frontend: Verify body of XS_TRANSACTION_END By guaranteeing that the argument of XS_TRANSACTION_END is valid we can assume that the transaction has been closed when we get an XS_ERROR response from xenstore (Note that we already verify that it's a valid transaction id). Signed-off-by: Simon Gaiser Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_dev_frontend.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 81a84b3c1c50..0d6d9264d6a9 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -429,6 +429,10 @@ static int xenbus_write_transaction(unsigned msg_type, { int rc; struct xenbus_transaction_holder *trans = NULL; + struct { + struct xsd_sockmsg hdr; + char body[]; + } *msg = (void *)u->u.buffer; if (msg_type == XS_TRANSACTION_START) { trans = kzalloc(sizeof(*trans), GFP_KERNEL); @@ -437,11 +441,15 @@ static int xenbus_write_transaction(unsigned msg_type, goto out; } list_add(&trans->list, &u->transactions); - } else if (u->u.msg.tx_id != 0 && - !xenbus_get_transaction(u, u->u.msg.tx_id)) + } else if (msg->hdr.tx_id != 0 && + !xenbus_get_transaction(u, msg->hdr.tx_id)) return xenbus_command_reply(u, XS_ERROR, "ENOENT"); + else if (msg_type == XS_TRANSACTION_END && + !(msg->hdr.len == 2 && + (!strcmp(msg->body, "T") || !strcmp(msg->body, "F")))) + return xenbus_command_reply(u, XS_ERROR, "EINVAL"); - rc = xenbus_dev_request_and_reply(&u->u.msg, u); + rc = xenbus_dev_request_and_reply(&msg->hdr, u); if (rc && trans) { list_del(&trans->list); kfree(trans); From 36104cb9012a82e73c32a3b709257766b16bcd1d Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Mon, 19 Mar 2018 12:58:04 -0400 Subject: [PATCH 4/7] x86/xen: Delay get_cpu_cap until stack canary is established Commit 2cc42bac1c79 ("x86-64/Xen: eliminate W+X mappings") introduced a call to get_cpu_cap, which is fstack-protected. This is works on x86-64 as commit 4f277295e54c ("x86/xen: init %gs very early to avoid page faults with stack protector") ensures the stack protector is configured, but it it did not cover x86-32. Delay calling get_cpu_cap until after xen_setup_gdt has initialized the stack canary. Without this, a 32bit PV machine crashes early in boot. (XEN) Domain 0 (vcpu#0) crashed on cpu#0: (XEN) ----[ Xen-4.6.6-xc x86_64 debug=n Tainted: C ]---- (XEN) CPU: 0 (XEN) RIP: e019:[<00000000c10362f8>] And the PV kernel IP corresponds to init_scattered_cpuid_features 0xc10362f8 <+24>: mov %gs:0x14,%eax Fixes 2cc42bac1c79 ("x86-64/Xen: eliminate W+X mappings") Signed-off-by: Jason Andryuk Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_pv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3c2c2530737e..c36d23aa6c35 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Get mfn list */ xen_build_dynamic_phys_to_machine(); @@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void) */ xen_setup_gdt(0); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + xen_init_irq_ops(); /* Let's presume PV guests always boot on vCPU with id 0. */ From 4d0f1ce6955913c490263359eadd392574cf9fe3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 15 Mar 2018 14:22:05 +0000 Subject: [PATCH 5/7] xen/acpi: upload _PSD info for non Dom0 CPUs too All uploaded PM data from non-dom0 CPUs takes the info from vCPU 0 and changing only the acpi_id. For processors which P-state coordination type is HW_ALL (0xFD) it is OK to upload bogus P-state dependency information (_PSD), because Xen will ignore any cpufreq domains created for past CPUs. Albeit for platforms which expose coordination types as SW_ANY or SW_ALL, this will have some unintended side effects. Effectively, it will look at the P-state domain existence and *if it already exists* it will skip the acpi-cpufreq initialization and thus inherit the policy from the first CPU in the cpufreq domain. This will finally lead to the original cpu not changing target freq to P0 other than the first in the domain. Which will make turbo boost not getting enabled (e.g. for 'performance' governor) for all cpus. This patch fixes that, by also evaluating _PSD when we enumerate all ACPI processors and thus always uploading the correct info to Xen. We export acpi_processor_get_psd() for that this purpose, but change signature to not assume an existent of acpi_processor given that ACPI isn't creating an acpi_processor for non-dom0 CPUs. Signed-off-by: Joao Martins Reviewed-by: Boris Ostrovsky Acked-by: Rafael J. Wysocki Signed-off-by: Boris Ostrovsky --- drivers/acpi/processor_perflib.c | 11 +++++------ drivers/xen/xen-acpi-processor.c | 24 ++++++++++++++++++++++++ include/acpi/processor.h | 2 ++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index c7cf48ad5cb9..a651ab3490d8 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module) EXPORT_SYMBOL(acpi_processor_notify_smm); -static int acpi_processor_get_psd(struct acpi_processor *pr) +int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain) { int result = 0; acpi_status status = AE_OK; @@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"}; struct acpi_buffer state = {0, NULL}; union acpi_object *psd = NULL; - struct acpi_psd_package *pdomain; - status = acpi_evaluate_object(pr->handle, "_PSD", NULL, &buffer); + status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer); if (ACPI_FAILURE(status)) { return -ENODEV; } @@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) goto end; } - pdomain = &(pr->performance->domain_info); - state.length = sizeof(struct acpi_psd_package); state.pointer = pdomain; @@ -597,6 +594,7 @@ end: kfree(buffer.pointer); return result; } +EXPORT_SYMBOL(acpi_processor_get_psd); int acpi_processor_preregister_performance( struct acpi_processor_performance __percpu *performance) @@ -645,7 +643,8 @@ int acpi_processor_preregister_performance( pr->performance = per_cpu_ptr(performance, i); cpumask_set_cpu(i, pr->performance->shared_cpu_map); - if (acpi_processor_get_psd(pr)) { + pdomain = &(pr->performance->domain_info); + if (acpi_processor_get_psd(pr->handle, pdomain)) { retval = -EINVAL; continue; } diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 23e391d3ec01..c80195e8fbd1 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -53,6 +53,8 @@ static unsigned long *acpi_ids_done; static unsigned long *acpi_id_present; /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ static unsigned long *acpi_id_cst_present; +/* Which ACPI P-State dependencies for a enumerated processor */ +static struct acpi_psd_package *acpi_psd; static int push_cxx_to_hypervisor(struct acpi_processor *_pr) { @@ -372,6 +374,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk); + /* It has P-state dependencies */ + if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) { + pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n", + acpi_id, acpi_psd[acpi_id].coord_type, + acpi_psd[acpi_id].domain); + } + status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { if (!pblk) @@ -405,6 +414,14 @@ static int check_acpi_ids(struct acpi_processor *pr_backup) return -ENOMEM; } + acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package), + GFP_KERNEL); + if (!acpi_psd) { + kfree(acpi_id_present); + kfree(acpi_id_cst_present); + return -ENOMEM; + } + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, read_acpi_id, NULL, NULL, NULL); @@ -417,6 +434,12 @@ upload: pr_backup->acpi_id = i; /* Mask out C-states if there are no _CST or PBLK */ pr_backup->flags.power = test_bit(i, acpi_id_cst_present); + /* num_entries is non-zero if we evaluated _PSD */ + if (acpi_psd[i].num_entries) { + memcpy(&pr_backup->performance->domain_info, + &acpi_psd[i], + sizeof(struct acpi_psd_package)); + } (void)upload_pm_data(pr_backup); } } @@ -566,6 +589,7 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); + kfree(acpi_psd); for_each_possible_cpu(i) acpi_processor_unregister_performance(i); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index d591bb77f592..40a916efd7c0 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -254,6 +254,8 @@ int acpi_processor_pstate_control(void); /* note: this locks both the calling module and the processor module if a _PPC object exists, rmmod is disallowed then */ int acpi_processor_notify_smm(struct module *calling_module); +int acpi_processor_get_psd(acpi_handle handle, + struct acpi_psd_package *pdomain); /* parsing the _P* objects. */ extern int acpi_processor_get_performance_info(struct acpi_processor *pr); From c37a3c94775855567b90f91775b9691e10bd2806 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Mar 2018 12:01:53 +0300 Subject: [PATCH 6/7] xen/acpi: off by one in read_acpi_id() If acpi_id is == nr_acpi_bits, then we access one element beyond the end of the acpi_psd[] array or we set one bit beyond the end of the bit map when we do __set_bit(acpi_id, acpi_id_present); Fixes: 59a568029181 ("xen/acpi-processor: C and P-state driver that uploads said data to hypervisor.") Signed-off-by: Dan Carpenter Reviewed-by: Joao Martins Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-acpi-processor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index c80195e8fbd1..b29f4e40851f 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -364,9 +364,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) } /* There are more ACPI Processor objects than in x2APIC or MADT. * This can happen with incorrect ACPI SSDT declerations. */ - if (acpi_id > nr_acpi_bits) { - pr_debug("We only have %u, trying to set %u\n", - nr_acpi_bits, acpi_id); + if (acpi_id >= nr_acpi_bits) { + pr_debug("max acpi id %u, trying to set %u\n", + nr_acpi_bits - 1, acpi_id); return AE_OK; } /* OK, There is a ACPI Processor object */ From a5a18ae73bcac37010a7b37e942add0f25b9d503 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 9 Apr 2018 14:51:44 -0400 Subject: [PATCH 7/7] xen/pvh: Indicate XENFEAT_linux_rsdp_unrestricted to Xen Pre-4.17 kernels ignored start_info's rsdp_paddr pointer and instead relied on finding RSDP in standard location in BIOS RO memory. This has worked since that's where Xen used to place it. However, with recent Xen change (commit 4a5733771e6f ("libxl: put RSDP for PVH guest near 4GB")) it prefers to keep RSDP at a "non-standard" address. Even though as of commit b17d9d1df3c3 ("x86/xen: Add pvh specific rsdp address retrieval function") Linux is able to find RSDP, for back-compatibility reasons we need to indicate to Xen that we can handle this, an we do so by setting XENFEAT_linux_rsdp_unrestricted flag in ELF notes. (Also take this opportunity and sync features.h header file with Xen) Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Reviewed-by: Wei Liu --- arch/x86/xen/xen-head.S | 4 +++- include/xen/interface/features.h | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 96f26e026783..5077ead5e59c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -89,7 +89,9 @@ END(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, - .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0)) + .long (1 << XENFEAT_writable_page_tables) | \ + (1 << XENFEAT_dom0) | \ + (1 << XENFEAT_linux_rsdp_unrestricted)) ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 9b0eb574f0d1..6d1384abfbdf 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -42,6 +42,9 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + /* * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel * available pte bits. @@ -60,6 +63,26 @@ /* operation as Dom0 is supported */ #define XENFEAT_dom0 11 +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ +#define XENFEAT_memory_op_vnode_supported 13 + +/* arm: Hypervisor supports ARM SMC calling convention. */ +#define XENFEAT_ARM_SMCCC_supported 14 + +/* + * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP + * must be located in lower 1MB, as required by ACPI Specification for IA-PC + * systems. + * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains + * the "linux" string. + */ +#define XENFEAT_linux_rsdp_unrestricted 15 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */