system/xen: Updated for version 4.17.0.
Signed-off-by: Mario Preksavec <mario@slackware.hr> Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
This commit is contained in:
parent
45f679b708
commit
4c5d49121f
|
@ -57,7 +57,7 @@ kernel-xen.sh: This script builds the Linux Kernel for a Xen Hypervisor.
|
|||
|
||||
* To make things a bit easier, a copy of Xen EFI binary can be found here:
|
||||
|
||||
http://slackware.hr/~mario/xen/xen-4.16.1.efi.gz
|
||||
http://slackware.hr/~mario/xen/xen-4.17.0.efi.gz
|
||||
|
||||
!!! Make sure to understand what are you doing at this point, you could
|
||||
easily lose your data. Always create backups !!!
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#
|
||||
# Automatically generated file; DO NOT EDIT.
|
||||
# Linux/x86 5.15.27 Kernel Configuration
|
||||
# Linux/x86 5.15.80 Kernel Configuration
|
||||
#
|
||||
CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.2.0"
|
||||
CONFIG_CC_IS_GCC=y
|
||||
|
@ -15,6 +15,7 @@ CONFIG_CC_CAN_LINK=y
|
|||
CONFIG_CC_CAN_LINK_STATIC=y
|
||||
CONFIG_CC_HAS_ASM_GOTO=y
|
||||
CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
|
||||
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
|
||||
CONFIG_CC_HAS_ASM_INLINE=y
|
||||
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
|
||||
CONFIG_IRQ_WORK=y
|
||||
|
@ -348,7 +349,6 @@ CONFIG_X86_FEATURE_NAMES=y
|
|||
CONFIG_X86_X2APIC=y
|
||||
CONFIG_X86_MPPARSE=y
|
||||
# CONFIG_GOLDFISH is not set
|
||||
CONFIG_RETPOLINE=y
|
||||
CONFIG_X86_CPU_RESCTRL=y
|
||||
# CONFIG_X86_EXTENDED_PLATFORM is not set
|
||||
CONFIG_X86_INTEL_LPSS=y
|
||||
|
@ -517,6 +517,14 @@ CONFIG_HAVE_LIVEPATCH=y
|
|||
CONFIG_LIVEPATCH=y
|
||||
# end of Processor type and features
|
||||
|
||||
CONFIG_CC_HAS_RETURN_THUNK=y
|
||||
CONFIG_SPECULATION_MITIGATIONS=y
|
||||
CONFIG_PAGE_TABLE_ISOLATION=y
|
||||
CONFIG_RETPOLINE=y
|
||||
CONFIG_RETHUNK=y
|
||||
CONFIG_CPU_UNRET_ENTRY=y
|
||||
CONFIG_CPU_IBPB_ENTRY=y
|
||||
CONFIG_CPU_IBRS_ENTRY=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -741,6 +749,7 @@ CONFIG_HAVE_KPROBES_ON_FTRACE=y
|
|||
CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
|
||||
CONFIG_HAVE_NMI=y
|
||||
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
|
||||
CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
|
||||
CONFIG_HAVE_ARCH_TRACEHOOK=y
|
||||
CONFIG_HAVE_DMA_CONTIGUOUS=y
|
||||
CONFIG_GENERIC_SMP_IDLE_THREAD=y
|
||||
|
@ -2470,6 +2479,7 @@ CONFIG_PNPACPI=y
|
|||
CONFIG_BLK_DEV=y
|
||||
CONFIG_BLK_DEV_NULL_BLK=m
|
||||
CONFIG_BLK_DEV_FD=y
|
||||
# CONFIG_BLK_DEV_FD_RAWCMD is not set
|
||||
CONFIG_CDROM=y
|
||||
CONFIG_PARIDE=m
|
||||
|
||||
|
@ -3131,6 +3141,7 @@ CONFIG_ATL1=m
|
|||
CONFIG_ATL1E=m
|
||||
CONFIG_ATL1C=m
|
||||
CONFIG_ALX=m
|
||||
CONFIG_CX_ECAT=m
|
||||
CONFIG_NET_VENDOR_BROADCOM=y
|
||||
CONFIG_B44=m
|
||||
CONFIG_B44_PCI_AUTOSELECT=y
|
||||
|
@ -3148,8 +3159,6 @@ CONFIG_BNXT=m
|
|||
CONFIG_BNXT_SRIOV=y
|
||||
CONFIG_BNXT_FLOWER_OFFLOAD=y
|
||||
CONFIG_BNXT_HWMON=y
|
||||
CONFIG_NET_VENDOR_BROCADE=y
|
||||
CONFIG_BNA=m
|
||||
CONFIG_NET_VENDOR_CADENCE=y
|
||||
CONFIG_MACB=m
|
||||
CONFIG_MACB_USE_HWSTAMP=y
|
||||
|
@ -3174,7 +3183,6 @@ CONFIG_CHELSIO_IPSEC_INLINE=m
|
|||
CONFIG_NET_VENDOR_CISCO=y
|
||||
CONFIG_ENIC=m
|
||||
# CONFIG_NET_VENDOR_CORTINA is not set
|
||||
CONFIG_CX_ECAT=m
|
||||
CONFIG_DNET=m
|
||||
CONFIG_NET_VENDOR_DEC=y
|
||||
CONFIG_NET_TULIP=y
|
||||
|
@ -3229,8 +3237,6 @@ CONFIG_I40EVF=m
|
|||
CONFIG_ICE=m
|
||||
CONFIG_FM10K=m
|
||||
CONFIG_IGC=m
|
||||
CONFIG_NET_VENDOR_MICROSOFT=y
|
||||
CONFIG_MICROSOFT_MANA=m
|
||||
CONFIG_JME=m
|
||||
CONFIG_NET_VENDOR_LITEX=y
|
||||
CONFIG_NET_VENDOR_MARVELL=y
|
||||
|
@ -3280,10 +3286,13 @@ CONFIG_KS8851_MLL=m
|
|||
CONFIG_KSZ884X_PCI=m
|
||||
# CONFIG_NET_VENDOR_MICROCHIP is not set
|
||||
# CONFIG_NET_VENDOR_MICROSEMI is not set
|
||||
CONFIG_NET_VENDOR_MICROSOFT=y
|
||||
CONFIG_MICROSOFT_MANA=m
|
||||
CONFIG_NET_VENDOR_MYRI=y
|
||||
CONFIG_MYRI10GE=m
|
||||
CONFIG_MYRI10GE_DCA=y
|
||||
CONFIG_FEALNX=m
|
||||
# CONFIG_NET_VENDOR_NI is not set
|
||||
CONFIG_NET_VENDOR_NATSEMI=y
|
||||
CONFIG_NATSEMI=m
|
||||
CONFIG_NS83820=m
|
||||
|
@ -3296,7 +3305,6 @@ CONFIG_NFP=m
|
|||
CONFIG_NFP_APP_FLOWER=y
|
||||
CONFIG_NFP_APP_ABM_NIC=y
|
||||
# CONFIG_NFP_DEBUG is not set
|
||||
# CONFIG_NET_VENDOR_NI is not set
|
||||
CONFIG_NET_VENDOR_8390=y
|
||||
CONFIG_PCMCIA_AXNET=m
|
||||
CONFIG_NE2K_PCI=m
|
||||
|
@ -3324,6 +3332,8 @@ CONFIG_QED_RDMA=y
|
|||
CONFIG_QED_ISCSI=y
|
||||
CONFIG_QED_FCOE=y
|
||||
CONFIG_QED_OOO=y
|
||||
CONFIG_NET_VENDOR_BROCADE=y
|
||||
CONFIG_BNA=m
|
||||
CONFIG_NET_VENDOR_QUALCOMM=y
|
||||
# CONFIG_QCOM_EMAC is not set
|
||||
# CONFIG_RMNET is not set
|
||||
|
@ -3344,6 +3354,11 @@ CONFIG_ROCKER=m
|
|||
CONFIG_NET_VENDOR_SAMSUNG=y
|
||||
CONFIG_SXGBE_ETH=m
|
||||
CONFIG_NET_VENDOR_SEEQ=y
|
||||
CONFIG_NET_VENDOR_SILAN=y
|
||||
CONFIG_SC92031=m
|
||||
CONFIG_NET_VENDOR_SIS=y
|
||||
CONFIG_SIS900=m
|
||||
CONFIG_SIS190=m
|
||||
CONFIG_NET_VENDOR_SOLARFLARE=y
|
||||
CONFIG_SFC=m
|
||||
CONFIG_SFC_MTD=y
|
||||
|
@ -3352,11 +3367,6 @@ CONFIG_SFC_SRIOV=y
|
|||
# CONFIG_SFC_MCDI_LOGGING is not set
|
||||
CONFIG_SFC_FALCON=m
|
||||
CONFIG_SFC_FALCON_MTD=y
|
||||
CONFIG_NET_VENDOR_SILAN=y
|
||||
CONFIG_SC92031=m
|
||||
CONFIG_NET_VENDOR_SIS=y
|
||||
CONFIG_SIS900=m
|
||||
CONFIG_SIS190=m
|
||||
CONFIG_NET_VENDOR_SMSC=y
|
||||
CONFIG_PCMCIA_SMC91C92=m
|
||||
CONFIG_EPIC100=m
|
||||
|
@ -7471,6 +7481,7 @@ CONFIG_TYPEC_TCPCI=m
|
|||
CONFIG_TYPEC_RT1711H=m
|
||||
CONFIG_TYPEC_TCPCI_MAXIM=m
|
||||
CONFIG_TYPEC_FUSB302=m
|
||||
CONFIG_TYPEC_WCOVE=m
|
||||
CONFIG_TYPEC_UCSI=m
|
||||
CONFIG_UCSI_CCG=m
|
||||
CONFIG_UCSI_ACPI=m
|
||||
|
@ -9502,7 +9513,6 @@ CONFIG_SECURITY_DMESG_RESTRICT=y
|
|||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITYFS=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_PAGE_TABLE_ISOLATION=y
|
||||
CONFIG_SECURITY_INFINIBAND=y
|
||||
CONFIG_SECURITY_NETWORK_XFRM=y
|
||||
# CONFIG_SECURITY_PATH is not set
|
||||
|
@ -9648,8 +9658,7 @@ CONFIG_CRYPTO_CRC32=m
|
|||
CONFIG_CRYPTO_CRC32_PCLMUL=m
|
||||
CONFIG_CRYPTO_XXHASH=m
|
||||
CONFIG_CRYPTO_BLAKE2B=y
|
||||
CONFIG_CRYPTO_BLAKE2S=m
|
||||
CONFIG_CRYPTO_BLAKE2S_X86=m
|
||||
CONFIG_CRYPTO_BLAKE2S_X86=y
|
||||
CONFIG_CRYPTO_CRCT10DIF=y
|
||||
CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
|
||||
CONFIG_CRYPTO_GHASH=y
|
||||
|
@ -9741,29 +9750,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
|
|||
CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y
|
||||
CONFIG_CRYPTO_STATS=y
|
||||
CONFIG_CRYPTO_HASH_INFO=y
|
||||
|
||||
#
|
||||
# Crypto library routines
|
||||
#
|
||||
CONFIG_CRYPTO_LIB_AES=y
|
||||
CONFIG_CRYPTO_LIB_ARC4=y
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=m
|
||||
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_BLAKE2S=m
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA=m
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_DES=y
|
||||
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_SHA256=y
|
||||
CONFIG_CRYPTO_LIB_SM4=m
|
||||
CONFIG_CRYPTO_HW=y
|
||||
CONFIG_CRYPTO_DEV_PADLOCK=m
|
||||
CONFIG_CRYPTO_DEV_PADLOCK_AES=m
|
||||
|
@ -9835,6 +9821,31 @@ CONFIG_GENERIC_IOMAP=y
|
|||
CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
|
||||
CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
|
||||
CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
|
||||
|
||||
#
|
||||
# Crypto library routines
|
||||
#
|
||||
CONFIG_CRYPTO_LIB_AES=y
|
||||
CONFIG_CRYPTO_LIB_ARC4=y
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=y
|
||||
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA=m
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_DES=y
|
||||
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
|
||||
CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
|
||||
CONFIG_CRYPTO_LIB_POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_LIB_SHA256=y
|
||||
CONFIG_CRYPTO_LIB_SM4=m
|
||||
# end of Crypto library routines
|
||||
|
||||
CONFIG_LIB_MEMNEQ=y
|
||||
CONFIG_CRC_CCITT=m
|
||||
CONFIG_CRC16=y
|
||||
CONFIG_CRC_T10DIF=y
|
||||
|
@ -9985,6 +9996,8 @@ CONFIG_SYMBOLIC_ERRNAME=y
|
|||
CONFIG_DEBUG_BUGVERBOSE=y
|
||||
# end of printk and dmesg options
|
||||
|
||||
CONFIG_AS_HAS_NON_CONST_LEB128=y
|
||||
|
||||
#
|
||||
# Compile-time checks and compiler options
|
||||
#
|
||||
|
@ -10209,7 +10222,6 @@ CONFIG_STRICT_DEVMEM=y
|
|||
#
|
||||
# x86 Debugging
|
||||
#
|
||||
CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
|
||||
CONFIG_EARLY_PRINTK_USB=y
|
||||
CONFIG_X86_VERBOSE_BOOTUP=y
|
||||
CONFIG_EARLY_PRINTK=y
|
|
@ -5,8 +5,8 @@
|
|||
# Written by Chris Abela <chris.abela@maltats.com>, 20100515
|
||||
# Modified by Mario Preksavec <mario@slackware.hr>
|
||||
|
||||
KERNEL=${KERNEL:-5.15.27}
|
||||
XEN=${XEN:-4.16.1}
|
||||
KERNEL=${KERNEL:-5.15.80}
|
||||
XEN=${XEN:-4.17.0}
|
||||
|
||||
ROOTMOD=${ROOTMOD:-ext4}
|
||||
ROOTFS=${ROOTFS:-ext4}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
set -e
|
||||
|
||||
KERNEL=${KERNEL:-5.15.27}
|
||||
KERNEL=${KERNEL:-5.15.80}
|
||||
|
||||
# Build an image for the root file system and another for the swap
|
||||
# Default values : 8GB and 500MB resepectively.
|
||||
|
|
|
@ -25,12 +25,12 @@
|
|||
cd $(dirname $0) ; CWD=$(pwd)
|
||||
|
||||
PRGNAM=xen
|
||||
VERSION=${VERSION:-4.16.2}
|
||||
VERSION=${VERSION:-4.17.0}
|
||||
BUILD=${BUILD:-1}
|
||||
TAG=${TAG:-_SBo}
|
||||
PKGTYPE=${PKGTYPE:-tgz}
|
||||
|
||||
SEABIOS=${SEABIOS:-1.14.0}
|
||||
SEABIOS=${SEABIOS:-1.16.0}
|
||||
OVMF=${OVMF:-20210824_7b4a99be8a}
|
||||
IPXE=${IPXE:-3c040ad387099483102708bb1839110bc788cefb}
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
PRGNAM="xen"
|
||||
VERSION="4.16.2"
|
||||
VERSION="4.17.0"
|
||||
HOMEPAGE="http://www.xenproject.org/"
|
||||
DOWNLOAD="UNSUPPORTED"
|
||||
MD5SUM=""
|
||||
DOWNLOAD_x86_64="http://mirror.slackware.hr/sources/xen/xen-4.16.2.tar.gz \
|
||||
DOWNLOAD_x86_64="http://mirror.slackware.hr/sources/xen/xen-4.17.0.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-3c040ad387099483102708bb1839110bc788cefb.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-extfiles/lwip-1.3.0.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-extfiles/zlib-1.2.3.tar.gz \
|
||||
|
@ -13,9 +13,9 @@ DOWNLOAD_x86_64="http://mirror.slackware.hr/sources/xen/xen-4.16.2.tar.gz \
|
|||
http://mirror.slackware.hr/sources/xen-extfiles/polarssl-1.1.4-gpl.tgz \
|
||||
http://mirror.slackware.hr/sources/xen-extfiles/gmp-4.3.2.tar.bz2 \
|
||||
http://mirror.slackware.hr/sources/xen-extfiles/tpm_emulator-0.7.4.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-seabios/seabios-1.14.0.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-seabios/seabios-1.16.0.tar.gz \
|
||||
http://mirror.slackware.hr/sources/xen-ovmf/xen-ovmf-20210824_7b4a99be8a.tar.bz2"
|
||||
MD5SUM_x86_64="6bd720f53e3c34a35cb8a8897a561e18 \
|
||||
MD5SUM_x86_64="b215062ff053378eed41e4a3e05081df \
|
||||
23ba00d5e2c5b4343d12665af73e1cb5 \
|
||||
36cc57650cffda9a0269493be2a169bb \
|
||||
debc62758716a169df9f62e6ab2bc634 \
|
||||
|
@ -25,7 +25,7 @@ MD5SUM_x86_64="6bd720f53e3c34a35cb8a8897a561e18 \
|
|||
7b72caf22b01464ee7d6165f2fd85f44 \
|
||||
dd60683d7057917e34630b4a787932e8 \
|
||||
e26becb8a6a2b6695f6b3e8097593db8 \
|
||||
9df3b7de6376850d09161137e7a9b61f \
|
||||
1411e7647ef93424fe88fea5d0ef9a82 \
|
||||
322d42a3378394b5486acc1564651a4f"
|
||||
REQUIRES="acpica yajl"
|
||||
MAINTAINER="Mario Preksavec"
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
From 4b4359122a414cc15156e13e3805988b71ff9da0 Mon Sep 17 00:00:00 2001
|
||||
From: Julien Grall <jgrall@amazon.com>
|
||||
Date: Mon, 6 Jun 2022 06:17:25 +0000
|
||||
Subject: [PATCH 1/2] xen/arm: p2m: Prevent adding mapping when domain is dying
|
||||
|
||||
During the domain destroy process, the domain will still be accessible
|
||||
until it is fully destroyed. So does the P2M because we don't bail
|
||||
out early if is_dying is non-zero. If a domain has permission to
|
||||
modify the other domain's P2M (i.e. dom0, or a stubdomain), then
|
||||
foreign mapping can be added past relinquish_p2m_mapping().
|
||||
|
||||
Therefore, we need to prevent mapping to be added when the domain
|
||||
is dying. This commit prevents such adding of mapping by adding the
|
||||
d->is_dying check to p2m_set_entry(). Also this commit enhances the
|
||||
check in relinquish_p2m_mapping() to make sure that no mappings can
|
||||
be added in the P2M after the P2M lock is released.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Julien Grall <jgrall@amazon.com>
|
||||
Signed-off-by: Henry Wang <Henry.Wang@arm.com>
|
||||
Tested-by: Henry Wang <Henry.Wang@arm.com>
|
||||
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
|
||||
---
|
||||
xen/arch/arm/p2m.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
|
||||
index fb71fa4c1c90..cbeff90f4371 100644
|
||||
--- a/xen/arch/arm/p2m.c
|
||||
+++ b/xen/arch/arm/p2m.c
|
||||
@@ -1093,6 +1093,15 @@ int p2m_set_entry(struct p2m_domain *p2m,
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
+ /*
|
||||
+ * Any reference taken by the P2M mappings (e.g. foreign mapping) will
|
||||
+ * be dropped in relinquish_p2m_mapping(). As the P2M will still
|
||||
+ * be accessible after, we need to prevent mapping to be added when the
|
||||
+ * domain is dying.
|
||||
+ */
|
||||
+ if ( unlikely(p2m->domain->is_dying) )
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
while ( nr )
|
||||
{
|
||||
unsigned long mask;
|
||||
@@ -1610,6 +1619,8 @@ int relinquish_p2m_mapping(struct domain *d)
|
||||
unsigned int order;
|
||||
gfn_t start, end;
|
||||
|
||||
+ BUG_ON(!d->is_dying);
|
||||
+ /* No mappings can be added in the P2M after the P2M lock is released. */
|
||||
p2m_write_lock(p2m);
|
||||
|
||||
start = p2m->lowest_mapped_gfn;
|
||||
--
|
||||
2.37.1
|
||||
|
|
@ -1,165 +0,0 @@
|
|||
From 0d5846490348fa09a0d0915d7c795685a016ce10 Mon Sep 17 00:00:00 2001
|
||||
From: Julien Grall <jgrall@amazon.com>
|
||||
Date: Mon, 6 Jun 2022 06:17:26 +0000
|
||||
Subject: [PATCH 2/2] xen/arm: p2m: Handle preemption when freeing intermediate
|
||||
page tables
|
||||
|
||||
At the moment the P2M page tables will be freed when the domain structure
|
||||
is freed without any preemption. As the P2M is quite large, iterating
|
||||
through this may take more time than it is reasonable without intermediate
|
||||
preemption (to run softirqs and perhaps scheduler).
|
||||
|
||||
Split p2m_teardown() in two parts: one preemptible and called when
|
||||
relinquishing the resources, the other one non-preemptible and called
|
||||
when freeing the domain structure.
|
||||
|
||||
As we are now freeing the P2M pages early, we also need to prevent
|
||||
further allocation if someone call p2m_set_entry() past p2m_teardown()
|
||||
(I wasn't able to prove this will never happen). This is done by
|
||||
the checking domain->is_dying from previous patch in p2m_set_entry().
|
||||
|
||||
Similarly, we want to make sure that no-one can accessed the free
|
||||
pages. Therefore the root is cleared before freeing pages.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Julien Grall <jgrall@amazon.com>
|
||||
Signed-off-by: Henry Wang <Henry.Wang@arm.com>
|
||||
Tested-by: Henry Wang <Henry.Wang@arm.com>
|
||||
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
|
||||
---
|
||||
xen/arch/arm/domain.c | 10 +++++++--
|
||||
xen/arch/arm/p2m.c | 47 ++++++++++++++++++++++++++++++++++++---
|
||||
xen/include/asm-arm/p2m.h | 13 +++++++++--
|
||||
3 files changed, 63 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
|
||||
index 96e1b235501d..2694c39127c5 100644
|
||||
--- a/xen/arch/arm/domain.c
|
||||
+++ b/xen/arch/arm/domain.c
|
||||
@@ -789,10 +789,10 @@ fail:
|
||||
void arch_domain_destroy(struct domain *d)
|
||||
{
|
||||
/* IOMMU page table is shared with P2M, always call
|
||||
- * iommu_domain_destroy() before p2m_teardown().
|
||||
+ * iommu_domain_destroy() before p2m_final_teardown().
|
||||
*/
|
||||
iommu_domain_destroy(d);
|
||||
- p2m_teardown(d);
|
||||
+ p2m_final_teardown(d);
|
||||
domain_vgic_free(d);
|
||||
domain_vuart_free(d);
|
||||
free_xenheap_page(d->shared_info);
|
||||
@@ -996,6 +996,7 @@ enum {
|
||||
PROG_xen,
|
||||
PROG_page,
|
||||
PROG_mapping,
|
||||
+ PROG_p2m,
|
||||
PROG_done,
|
||||
};
|
||||
|
||||
@@ -1056,6 +1057,11 @@ int domain_relinquish_resources(struct domain *d)
|
||||
if ( ret )
|
||||
return ret;
|
||||
|
||||
+ PROGRESS(p2m):
|
||||
+ ret = p2m_teardown(d);
|
||||
+ if ( ret )
|
||||
+ return ret;
|
||||
+
|
||||
PROGRESS(done):
|
||||
break;
|
||||
|
||||
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
|
||||
index cbeff90f4371..3bcd1e897e88 100644
|
||||
--- a/xen/arch/arm/p2m.c
|
||||
+++ b/xen/arch/arm/p2m.c
|
||||
@@ -1527,17 +1527,58 @@ static void p2m_free_vmid(struct domain *d)
|
||||
spin_unlock(&vmid_alloc_lock);
|
||||
}
|
||||
|
||||
-void p2m_teardown(struct domain *d)
|
||||
+int p2m_teardown(struct domain *d)
|
||||
{
|
||||
struct p2m_domain *p2m = p2m_get_hostp2m(d);
|
||||
+ unsigned long count = 0;
|
||||
struct page_info *pg;
|
||||
+ unsigned int i;
|
||||
+ int rc = 0;
|
||||
+
|
||||
+ p2m_write_lock(p2m);
|
||||
+
|
||||
+ /*
|
||||
+ * We are about to free the intermediate page-tables, so clear the
|
||||
+ * root to prevent any walk to use them.
|
||||
+ */
|
||||
+ for ( i = 0; i < P2M_ROOT_PAGES; i++ )
|
||||
+ clear_and_clean_page(p2m->root + i);
|
||||
+
|
||||
+ /*
|
||||
+ * The domain will not be scheduled anymore, so in theory we should
|
||||
+ * not need to flush the TLBs. Do it for safety purpose.
|
||||
+ *
|
||||
+ * Note that all the devices have already been de-assigned. So we don't
|
||||
+ * need to flush the IOMMU TLB here.
|
||||
+ */
|
||||
+ p2m_force_tlb_flush_sync(p2m);
|
||||
+
|
||||
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
|
||||
+ {
|
||||
+ free_domheap_page(pg);
|
||||
+ count++;
|
||||
+ /* Arbitrarily preempt every 512 iterations */
|
||||
+ if ( !(count % 512) && hypercall_preempt_check() )
|
||||
+ {
|
||||
+ rc = -ERESTART;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p2m_write_unlock(p2m);
|
||||
+
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
+void p2m_final_teardown(struct domain *d)
|
||||
+{
|
||||
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
|
||||
|
||||
/* p2m not actually initialized */
|
||||
if ( !p2m->domain )
|
||||
return;
|
||||
|
||||
- while ( (pg = page_list_remove_head(&p2m->pages)) )
|
||||
- free_domheap_page(pg);
|
||||
+ ASSERT(page_list_empty(&p2m->pages));
|
||||
|
||||
if ( p2m->root )
|
||||
free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
|
||||
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
|
||||
index 8f11d9c97b5d..b3ba83283e11 100644
|
||||
--- a/xen/include/asm-arm/p2m.h
|
||||
+++ b/xen/include/asm-arm/p2m.h
|
||||
@@ -192,8 +192,17 @@ void setup_virt_paging(void);
|
||||
/* Init the datastructures for later use by the p2m code */
|
||||
int p2m_init(struct domain *d);
|
||||
|
||||
-/* Return all the p2m resources to Xen. */
|
||||
-void p2m_teardown(struct domain *d);
|
||||
+/*
|
||||
+ * The P2M resources are freed in two parts:
|
||||
+ * - p2m_teardown() will be called when relinquish the resources. It
|
||||
+ * will free large resources (e.g. intermediate page-tables) that
|
||||
+ * requires preemption.
|
||||
+ * - p2m_final_teardown() will be called when domain struct is been
|
||||
+ * freed. This *cannot* be preempted and therefore one small
|
||||
+ * resources should be freed here.
|
||||
+ */
|
||||
+int p2m_teardown(struct domain *d);
|
||||
+void p2m_final_teardown(struct domain *d);
|
||||
|
||||
/*
|
||||
* Remove mapping refcount on each mapping page in the p2m
|
||||
--
|
||||
2.37.1
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
From: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Subject: x86/p2m: add option to skip root pagetable removal in p2m_teardown()
|
||||
|
||||
Add a new parameter to p2m_teardown() in order to select whether the
|
||||
root page table should also be freed. Note that all users are
|
||||
adjusted to pass the parameter to remove the root page tables, so
|
||||
behavior is not modified.
|
||||
|
||||
No functional change intended.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Suggested-by: Julien Grall <julien@xen.org>
|
||||
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/include/asm-x86/p2m.h
|
||||
+++ b/xen/include/asm-x86/p2m.h
|
||||
@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
|
||||
int p2m_alloc_table(struct p2m_domain *p2m);
|
||||
|
||||
/* Return all the p2m resources to Xen. */
|
||||
-void p2m_teardown(struct p2m_domain *p2m);
|
||||
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
|
||||
void p2m_final_teardown(struct domain *d);
|
||||
|
||||
/* Add a page to a domain's p2m table */
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -541,18 +541,18 @@ void hap_final_teardown(struct domain *d
|
||||
}
|
||||
|
||||
for ( i = 0; i < MAX_ALTP2M; i++ )
|
||||
- p2m_teardown(d->arch.altp2m_p2m[i]);
|
||||
+ p2m_teardown(d->arch.altp2m_p2m[i], true);
|
||||
}
|
||||
|
||||
/* Destroy nestedp2m's first */
|
||||
for (i = 0; i < MAX_NESTEDP2M; i++) {
|
||||
- p2m_teardown(d->arch.nested_p2m[i]);
|
||||
+ p2m_teardown(d->arch.nested_p2m[i], true);
|
||||
}
|
||||
|
||||
if ( d->arch.paging.hap.total_pages != 0 )
|
||||
hap_teardown(d, NULL);
|
||||
|
||||
- p2m_teardown(p2m_get_hostp2m(d));
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), true);
|
||||
/* Free any memory that the p2m teardown released */
|
||||
paging_lock(d);
|
||||
hap_set_allocation(d, 0, NULL);
|
||||
--- a/xen/arch/x86/mm/p2m.c
|
||||
+++ b/xen/arch/x86/mm/p2m.c
|
||||
@@ -749,11 +749,11 @@ int p2m_alloc_table(struct p2m_domain *p
|
||||
* hvm fixme: when adding support for pvh non-hardware domains, this path must
|
||||
* cleanup any foreign p2m types (release refcnts on them).
|
||||
*/
|
||||
-void p2m_teardown(struct p2m_domain *p2m)
|
||||
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
|
||||
/* Return all the p2m pages to Xen.
|
||||
* We know we don't have any extra mappings to these pages */
|
||||
{
|
||||
- struct page_info *pg;
|
||||
+ struct page_info *pg, *root_pg = NULL;
|
||||
struct domain *d;
|
||||
|
||||
if (p2m == NULL)
|
||||
@@ -763,10 +763,22 @@ void p2m_teardown(struct p2m_domain *p2m
|
||||
|
||||
p2m_lock(p2m);
|
||||
ASSERT(atomic_read(&d->shr_pages) == 0);
|
||||
- p2m->phys_table = pagetable_null();
|
||||
+
|
||||
+ if ( remove_root )
|
||||
+ p2m->phys_table = pagetable_null();
|
||||
+ else if ( !pagetable_is_null(p2m->phys_table) )
|
||||
+ {
|
||||
+ root_pg = pagetable_get_page(p2m->phys_table);
|
||||
+ clear_domain_page(pagetable_get_mfn(p2m->phys_table));
|
||||
+ }
|
||||
|
||||
while ( (pg = page_list_remove_head(&p2m->pages)) )
|
||||
- d->arch.paging.free_page(d, pg);
|
||||
+ if ( pg != root_pg )
|
||||
+ d->arch.paging.free_page(d, pg);
|
||||
+
|
||||
+ if ( root_pg )
|
||||
+ page_list_add(root_pg, &p2m->pages);
|
||||
+
|
||||
p2m_unlock(p2m);
|
||||
}
|
||||
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -2701,7 +2701,7 @@ int shadow_enable(struct domain *d, u32
|
||||
paging_unlock(d);
|
||||
out_unlocked:
|
||||
if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
|
||||
- p2m_teardown(p2m);
|
||||
+ p2m_teardown(p2m, true);
|
||||
if ( rv != 0 && pg != NULL )
|
||||
{
|
||||
pg->count_info &= ~PGC_count_mask;
|
||||
@@ -2866,7 +2866,7 @@ void shadow_final_teardown(struct domain
|
||||
shadow_teardown(d, NULL);
|
||||
|
||||
/* It is now safe to pull down the p2m map. */
|
||||
- p2m_teardown(p2m_get_hostp2m(d));
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), true);
|
||||
/* Free any shadow memory that the p2m teardown released */
|
||||
paging_lock(d);
|
||||
shadow_set_allocation(d, 0, NULL);
|
|
@ -1,62 +0,0 @@
|
|||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/HAP: adjust monitor table related error handling
|
||||
|
||||
hap_make_monitor_table() will return INVALID_MFN if it encounters an
|
||||
error condition, but hap_update_paging_modes() wasn’t handling this
|
||||
value, resulting in an inappropriate value being stored in
|
||||
monitor_table. This would subsequently misguide at least
|
||||
hap_vcpu_teardown(). Avoid this by bailing early.
|
||||
|
||||
Further, when a domain has/was already crashed or (perhaps less
|
||||
important as there's no such path known to lead here) is already dying,
|
||||
avoid calling domain_crash() on it again - that's at best confusing.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <asm/domain.h>
|
||||
#include <xen/numa.h>
|
||||
#include <asm/hvm/nestedhvm.h>
|
||||
+#include <public/sched.h>
|
||||
|
||||
#include "private.h"
|
||||
|
||||
@@ -405,8 +406,13 @@ static mfn_t hap_make_monitor_table(stru
|
||||
return m4mfn;
|
||||
|
||||
oom:
|
||||
- printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
|
||||
- domain_crash(d);
|
||||
+ if ( !d->is_dying &&
|
||||
+ (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
|
||||
+ {
|
||||
+ printk(XENLOG_G_ERR "%pd: out of memory building monitor pagetable\n",
|
||||
+ d);
|
||||
+ domain_crash(d);
|
||||
+ }
|
||||
return INVALID_MFN;
|
||||
}
|
||||
|
||||
@@ -766,6 +772,9 @@ static void hap_update_paging_modes(stru
|
||||
if ( pagetable_is_null(v->arch.hvm.monitor_table) )
|
||||
{
|
||||
mfn_t mmfn = hap_make_monitor_table(v);
|
||||
+
|
||||
+ if ( mfn_eq(mmfn, INVALID_MFN) )
|
||||
+ goto unlock;
|
||||
v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
|
||||
make_cr3(v, mmfn);
|
||||
hvm_update_host_cr3(v);
|
||||
@@ -774,6 +783,7 @@ static void hap_update_paging_modes(stru
|
||||
/* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
|
||||
hap_update_cr3(v, 0, false);
|
||||
|
||||
+ unlock:
|
||||
paging_unlock(d);
|
||||
put_gfn(d, cr3_gfn);
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/shadow: tolerate failure of sh_set_toplevel_shadow()
|
||||
|
||||
Subsequently sh_set_toplevel_shadow() will be adjusted to install a
|
||||
blank entry in case prealloc fails. There are, in fact, pre-existing
|
||||
error paths which would put in place a blank entry. The 4- and 2-level
|
||||
code in sh_update_cr3(), however, assume the top level entry to be
|
||||
valid.
|
||||
|
||||
Hence bail from the function in the unlikely event that it's not. Note
|
||||
that 3-level logic works differently: In particular a guest is free to
|
||||
supply a PDPTR pointing at 4 non-present (or otherwise deemed invalid)
|
||||
entries. The guest will crash, but we already cope with that.
|
||||
|
||||
Really mfn_valid() is likely wrong to use in sh_set_toplevel_shadow(),
|
||||
and it should instead be !mfn_eq(gmfn, INVALID_MFN). Avoid such a change
|
||||
in security context, but add a respective assertion.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -2516,6 +2516,7 @@ void sh_set_toplevel_shadow(struct vcpu
|
||||
/* Now figure out the new contents: is this a valid guest MFN? */
|
||||
if ( !mfn_valid(gmfn) )
|
||||
{
|
||||
+ ASSERT(mfn_eq(gmfn, INVALID_MFN));
|
||||
new_entry = pagetable_null();
|
||||
goto install_new_entry;
|
||||
}
|
||||
--- a/xen/arch/x86/mm/shadow/multi.c
|
||||
+++ b/xen/arch/x86/mm/shadow/multi.c
|
||||
@@ -3312,6 +3312,11 @@ sh_update_cr3(struct vcpu *v, int do_loc
|
||||
if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
|
||||
guest_flush_tlb_mask(d, d->dirty_cpumask);
|
||||
sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
|
||||
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
|
||||
+ {
|
||||
+ ASSERT(d->is_dying || d->is_shutting_down);
|
||||
+ return;
|
||||
+ }
|
||||
if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
|
||||
{
|
||||
mfn_t smfn = pagetable_get_mfn(v->arch.paging.shadow.shadow_table[0]);
|
||||
@@ -3370,6 +3375,11 @@ sh_update_cr3(struct vcpu *v, int do_loc
|
||||
if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
|
||||
guest_flush_tlb_mask(d, d->dirty_cpumask);
|
||||
sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
|
||||
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
|
||||
+ {
|
||||
+ ASSERT(d->is_dying || d->is_shutting_down);
|
||||
+ return;
|
||||
+ }
|
||||
#else
|
||||
#error This should never happen
|
||||
#endif
|
|
@ -1,255 +0,0 @@
|
|||
From: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Subject: x86/shadow: tolerate failure in shadow_prealloc()
|
||||
|
||||
Prevent _shadow_prealloc() from calling BUG() when unable to fulfill
|
||||
the pre-allocation and instead return true/false. Modify
|
||||
shadow_prealloc() to crash the domain on allocation failure (if the
|
||||
domain is not already dying), as shadow cannot operate normally after
|
||||
that. Modify callers to also gracefully handle {_,}shadow_prealloc()
|
||||
failing to fulfill the request.
|
||||
|
||||
Note this in turn requires adjusting the callers of
|
||||
sh_make_monitor_table() also to handle it returning INVALID_MFN.
|
||||
sh_update_paging_modes() is also modified to add additional error
|
||||
paths in case of allocation failure, some of those will return with
|
||||
null monitor page tables (and the domain likely crashed). This is no
|
||||
different that current error paths, but the newly introduced ones are
|
||||
more likely to trigger.
|
||||
|
||||
The now added failure points in sh_update_paging_modes() also require
|
||||
that on some error return paths the previous structures are cleared,
|
||||
and thus monitor table is null.
|
||||
|
||||
While there adjust the 'type' parameter type of shadow_prealloc() to
|
||||
unsigned int rather than u32.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <asm/flushtlb.h>
|
||||
#include <asm/shadow.h>
|
||||
#include <xen/numa.h>
|
||||
+#include <public/sched.h>
|
||||
#include "private.h"
|
||||
|
||||
DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
|
||||
@@ -928,14 +929,15 @@ static inline void trace_shadow_prealloc
|
||||
|
||||
/* Make sure there are at least count order-sized pages
|
||||
* available in the shadow page pool. */
|
||||
-static void _shadow_prealloc(struct domain *d, unsigned int pages)
|
||||
+static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
|
||||
{
|
||||
struct vcpu *v;
|
||||
struct page_info *sp, *t;
|
||||
mfn_t smfn;
|
||||
int i;
|
||||
|
||||
- if ( d->arch.paging.shadow.free_pages >= pages ) return;
|
||||
+ if ( d->arch.paging.shadow.free_pages >= pages )
|
||||
+ return true;
|
||||
|
||||
/* Shouldn't have enabled shadows if we've no vcpus. */
|
||||
ASSERT(d->vcpu && d->vcpu[0]);
|
||||
@@ -951,7 +953,8 @@ static void _shadow_prealloc(struct doma
|
||||
sh_unpin(d, smfn);
|
||||
|
||||
/* See if that freed up enough space */
|
||||
- if ( d->arch.paging.shadow.free_pages >= pages ) return;
|
||||
+ if ( d->arch.paging.shadow.free_pages >= pages )
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* Stage two: all shadow pages are in use in hierarchies that are
|
||||
@@ -974,7 +977,7 @@ static void _shadow_prealloc(struct doma
|
||||
if ( d->arch.paging.shadow.free_pages >= pages )
|
||||
{
|
||||
guest_flush_tlb_mask(d, d->dirty_cpumask);
|
||||
- return;
|
||||
+ return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -987,7 +990,12 @@ static void _shadow_prealloc(struct doma
|
||||
d->arch.paging.shadow.total_pages,
|
||||
d->arch.paging.shadow.free_pages,
|
||||
d->arch.paging.shadow.p2m_pages);
|
||||
- BUG();
|
||||
+
|
||||
+ ASSERT(d->is_dying);
|
||||
+
|
||||
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
|
||||
+
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Make sure there are at least count pages of the order according to
|
||||
@@ -995,9 +1003,19 @@ static void _shadow_prealloc(struct doma
|
||||
* This must be called before any calls to shadow_alloc(). Since this
|
||||
* will free existing shadows to make room, it must be called early enough
|
||||
* to avoid freeing shadows that the caller is currently working on. */
|
||||
-void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
|
||||
+bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
|
||||
{
|
||||
- return _shadow_prealloc(d, shadow_size(type) * count);
|
||||
+ bool ret = _shadow_prealloc(d, shadow_size(type) * count);
|
||||
+
|
||||
+ if ( !ret && !d->is_dying &&
|
||||
+ (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
|
||||
+ /*
|
||||
+ * Failing to allocate memory required for shadow usage can only result in
|
||||
+ * a domain crash, do it here rather that relying on every caller to do it.
|
||||
+ */
|
||||
+ domain_crash(d);
|
||||
+
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/* Deliberately free all the memory we can: this will tear down all of
|
||||
@@ -1218,7 +1236,7 @@ void shadow_free(struct domain *d, mfn_t
|
||||
static struct page_info *
|
||||
shadow_alloc_p2m_page(struct domain *d)
|
||||
{
|
||||
- struct page_info *pg;
|
||||
+ struct page_info *pg = NULL;
|
||||
|
||||
/* This is called both from the p2m code (which never holds the
|
||||
* paging lock) and the log-dirty code (which always does). */
|
||||
@@ -1236,16 +1254,18 @@ shadow_alloc_p2m_page(struct domain *d)
|
||||
d->arch.paging.shadow.p2m_pages,
|
||||
shadow_min_acceptable_pages(d));
|
||||
}
|
||||
- paging_unlock(d);
|
||||
- return NULL;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
- shadow_prealloc(d, SH_type_p2m_table, 1);
|
||||
+ if ( !shadow_prealloc(d, SH_type_p2m_table, 1) )
|
||||
+ goto out;
|
||||
+
|
||||
pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
|
||||
d->arch.paging.shadow.p2m_pages++;
|
||||
d->arch.paging.shadow.total_pages--;
|
||||
ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
|
||||
|
||||
+ out:
|
||||
paging_unlock(d);
|
||||
|
||||
return pg;
|
||||
@@ -1336,7 +1356,9 @@ int shadow_set_allocation(struct domain
|
||||
else if ( d->arch.paging.shadow.total_pages > pages )
|
||||
{
|
||||
/* Need to return memory to domheap */
|
||||
- _shadow_prealloc(d, 1);
|
||||
+ if ( !_shadow_prealloc(d, 1) )
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
sp = page_list_remove_head(&d->arch.paging.shadow.freelist);
|
||||
ASSERT(sp);
|
||||
/*
|
||||
@@ -2334,12 +2356,13 @@ static void sh_update_paging_modes(struc
|
||||
if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) )
|
||||
{
|
||||
int i;
|
||||
+
|
||||
+ if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) )
|
||||
+ return;
|
||||
+
|
||||
for(i = 0; i < SHADOW_OOS_PAGES; i++)
|
||||
- {
|
||||
- shadow_prealloc(d, SH_type_oos_snapshot, 1);
|
||||
v->arch.paging.shadow.oos_snapshot[i] =
|
||||
shadow_alloc(d, SH_type_oos_snapshot, 0);
|
||||
- }
|
||||
}
|
||||
#endif /* OOS */
|
||||
|
||||
@@ -2403,6 +2426,9 @@ static void sh_update_paging_modes(struc
|
||||
mfn_t mmfn = sh_make_monitor_table(
|
||||
v, v->arch.paging.mode->shadow.shadow_levels);
|
||||
|
||||
+ if ( mfn_eq(mmfn, INVALID_MFN) )
|
||||
+ return;
|
||||
+
|
||||
v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
|
||||
make_cr3(v, mmfn);
|
||||
hvm_update_host_cr3(v);
|
||||
@@ -2441,6 +2467,12 @@ static void sh_update_paging_modes(struc
|
||||
v->arch.hvm.monitor_table = pagetable_null();
|
||||
new_mfn = sh_make_monitor_table(
|
||||
v, v->arch.paging.mode->shadow.shadow_levels);
|
||||
+ if ( mfn_eq(new_mfn, INVALID_MFN) )
|
||||
+ {
|
||||
+ sh_destroy_monitor_table(v, old_mfn,
|
||||
+ old_mode->shadow.shadow_levels);
|
||||
+ return;
|
||||
+ }
|
||||
v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn);
|
||||
SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
|
||||
mfn_x(new_mfn));
|
||||
@@ -2526,7 +2558,12 @@ void sh_set_toplevel_shadow(struct vcpu
|
||||
if ( !mfn_valid(smfn) )
|
||||
{
|
||||
/* Make sure there's enough free shadow memory. */
|
||||
- shadow_prealloc(d, root_type, 1);
|
||||
+ if ( !shadow_prealloc(d, root_type, 1) )
|
||||
+ {
|
||||
+ new_entry = pagetable_null();
|
||||
+ goto install_new_entry;
|
||||
+ }
|
||||
+
|
||||
/* Shadow the page. */
|
||||
smfn = make_shadow(v, gmfn, root_type);
|
||||
}
|
||||
--- a/xen/arch/x86/mm/shadow/hvm.c
|
||||
+++ b/xen/arch/x86/mm/shadow/hvm.c
|
||||
@@ -700,7 +700,9 @@ mfn_t sh_make_monitor_table(const struct
|
||||
ASSERT(!pagetable_get_pfn(v->arch.hvm.monitor_table));
|
||||
|
||||
/* Guarantee we can get the memory we need */
|
||||
- shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
|
||||
+ if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) )
|
||||
+ return INVALID_MFN;
|
||||
+
|
||||
m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
|
||||
mfn_to_page(m4mfn)->shadow_flags = 4;
|
||||
|
||||
--- a/xen/arch/x86/mm/shadow/multi.c
|
||||
+++ b/xen/arch/x86/mm/shadow/multi.c
|
||||
@@ -2440,9 +2440,14 @@ static int sh_page_fault(struct vcpu *v,
|
||||
* Preallocate shadow pages *before* removing writable accesses
|
||||
* otherwhise an OOS L1 might be demoted and promoted again with
|
||||
* writable mappings. */
|
||||
- shadow_prealloc(d,
|
||||
- SH_type_l1_shadow,
|
||||
- GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
|
||||
+ if ( !shadow_prealloc(d, SH_type_l1_shadow,
|
||||
+ GUEST_PAGING_LEVELS < 4
|
||||
+ ? 1 : GUEST_PAGING_LEVELS - 1) )
|
||||
+ {
|
||||
+ paging_unlock(d);
|
||||
+ put_gfn(d, gfn_x(gfn));
|
||||
+ return 0;
|
||||
+ }
|
||||
|
||||
rc = gw_remove_write_accesses(v, va, &gw);
|
||||
|
||||
--- a/xen/arch/x86/mm/shadow/private.h
|
||||
+++ b/xen/arch/x86/mm/shadow/private.h
|
||||
@@ -383,7 +383,8 @@ void shadow_promote(struct domain *d, mf
|
||||
void shadow_demote(struct domain *d, mfn_t gmfn, u32 type);
|
||||
|
||||
/* Shadow page allocation functions */
|
||||
-void shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
|
||||
+bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type,
|
||||
+ unsigned int count);
|
||||
mfn_t shadow_alloc(struct domain *d,
|
||||
u32 shadow_type,
|
||||
unsigned long backpointer);
|
|
@ -1,82 +0,0 @@
|
|||
From: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Subject: x86/p2m: refuse new allocations for dying domains
|
||||
|
||||
This will in particular prevent any attempts to add entries to the p2m,
|
||||
once - in a subsequent change - non-root entries have been removed.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -245,6 +245,9 @@ static struct page_info *hap_alloc(struc
|
||||
|
||||
ASSERT(paging_locked_by_me(d));
|
||||
|
||||
+ if ( unlikely(d->is_dying) )
|
||||
+ return NULL;
|
||||
+
|
||||
pg = page_list_remove_head(&d->arch.paging.hap.freelist);
|
||||
if ( unlikely(!pg) )
|
||||
return NULL;
|
||||
@@ -281,7 +284,7 @@ static struct page_info *hap_alloc_p2m_p
|
||||
d->arch.paging.hap.p2m_pages++;
|
||||
ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
|
||||
}
|
||||
- else if ( !d->arch.paging.p2m_alloc_failed )
|
||||
+ else if ( !d->arch.paging.p2m_alloc_failed && !d->is_dying )
|
||||
{
|
||||
d->arch.paging.p2m_alloc_failed = 1;
|
||||
dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -939,6 +939,10 @@ static bool __must_check _shadow_preallo
|
||||
if ( d->arch.paging.shadow.free_pages >= pages )
|
||||
return true;
|
||||
|
||||
+ if ( unlikely(d->is_dying) )
|
||||
+ /* No reclaim when the domain is dying, teardown will take care of it. */
|
||||
+ return false;
|
||||
+
|
||||
/* Shouldn't have enabled shadows if we've no vcpus. */
|
||||
ASSERT(d->vcpu && d->vcpu[0]);
|
||||
|
||||
@@ -991,7 +995,7 @@ static bool __must_check _shadow_preallo
|
||||
d->arch.paging.shadow.free_pages,
|
||||
d->arch.paging.shadow.p2m_pages);
|
||||
|
||||
- ASSERT(d->is_dying);
|
||||
+ ASSERT_UNREACHABLE();
|
||||
|
||||
guest_flush_tlb_mask(d, d->dirty_cpumask);
|
||||
|
||||
@@ -1005,10 +1009,13 @@ static bool __must_check _shadow_preallo
|
||||
* to avoid freeing shadows that the caller is currently working on. */
|
||||
bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
|
||||
{
|
||||
- bool ret = _shadow_prealloc(d, shadow_size(type) * count);
|
||||
+ bool ret;
|
||||
+
|
||||
+ if ( unlikely(d->is_dying) )
|
||||
+ return false;
|
||||
|
||||
- if ( !ret && !d->is_dying &&
|
||||
- (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
|
||||
+ ret = _shadow_prealloc(d, shadow_size(type) * count);
|
||||
+ if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
|
||||
/*
|
||||
* Failing to allocate memory required for shadow usage can only result in
|
||||
* a domain crash, do it here rather that relying on every caller to do it.
|
||||
@@ -1238,6 +1245,9 @@ shadow_alloc_p2m_page(struct domain *d)
|
||||
{
|
||||
struct page_info *pg = NULL;
|
||||
|
||||
+ if ( unlikely(d->is_dying) )
|
||||
+ return NULL;
|
||||
+
|
||||
/* This is called both from the p2m code (which never holds the
|
||||
* paging lock) and the log-dirty code (which always does). */
|
||||
paging_lock_recursive(d);
|
|
@ -1,96 +0,0 @@
|
|||
From: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Subject: x86/p2m: truly free paging pool memory for dying domains
|
||||
|
||||
Modify {hap,shadow}_free to free the page immediately if the domain is
|
||||
dying, so that pages don't accumulate in the pool when
|
||||
{shadow,hap}_final_teardown() get called. This is to limit the amount of
|
||||
work which needs to be done there (in a non-preemptable manner).
|
||||
|
||||
Note the call to shadow_free() in shadow_free_p2m_page() is moved after
|
||||
increasing total_pages, so that the decrease done in shadow_free() in
|
||||
case the domain is dying doesn't underflow the counter, even if just for
|
||||
a short interval.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -265,6 +265,18 @@ static void hap_free(struct domain *d, m
|
||||
|
||||
ASSERT(paging_locked_by_me(d));
|
||||
|
||||
+ /*
|
||||
+ * For dying domains, actually free the memory here. This way less work is
|
||||
+ * left to hap_final_teardown(), which cannot easily have preemption checks
|
||||
+ * added.
|
||||
+ */
|
||||
+ if ( unlikely(d->is_dying) )
|
||||
+ {
|
||||
+ free_domheap_page(pg);
|
||||
+ d->arch.paging.hap.total_pages--;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
d->arch.paging.hap.free_pages++;
|
||||
page_list_add_tail(pg, &d->arch.paging.hap.freelist);
|
||||
}
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -1187,6 +1187,7 @@ mfn_t shadow_alloc(struct domain *d,
|
||||
void shadow_free(struct domain *d, mfn_t smfn)
|
||||
{
|
||||
struct page_info *next = NULL, *sp = mfn_to_page(smfn);
|
||||
+ bool dying = ACCESS_ONCE(d->is_dying);
|
||||
struct page_list_head *pin_list;
|
||||
unsigned int pages;
|
||||
u32 shadow_type;
|
||||
@@ -1229,11 +1230,32 @@ void shadow_free(struct domain *d, mfn_t
|
||||
* just before the allocator hands the page out again. */
|
||||
page_set_tlbflush_timestamp(sp);
|
||||
perfc_decr(shadow_alloc_count);
|
||||
- page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
|
||||
+
|
||||
+ /*
|
||||
+ * For dying domains, actually free the memory here. This way less
|
||||
+ * work is left to shadow_final_teardown(), which cannot easily have
|
||||
+ * preemption checks added.
|
||||
+ */
|
||||
+ if ( unlikely(dying) )
|
||||
+ {
|
||||
+ /*
|
||||
+ * The backpointer field (sh.back) used by shadow code aliases the
|
||||
+ * domain owner field, unconditionally clear it here to avoid
|
||||
+ * free_domheap_page() attempting to parse it.
|
||||
+ */
|
||||
+ page_set_owner(sp, NULL);
|
||||
+ free_domheap_page(sp);
|
||||
+ }
|
||||
+ else
|
||||
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
|
||||
+
|
||||
sp = next;
|
||||
}
|
||||
|
||||
- d->arch.paging.shadow.free_pages += pages;
|
||||
+ if ( unlikely(dying) )
|
||||
+ d->arch.paging.shadow.total_pages -= pages;
|
||||
+ else
|
||||
+ d->arch.paging.shadow.free_pages += pages;
|
||||
}
|
||||
|
||||
/* Divert a page from the pool to be used by the p2m mapping.
|
||||
@@ -1303,9 +1325,9 @@ shadow_free_p2m_page(struct domain *d, s
|
||||
* paging lock) and the log-dirty code (which always does). */
|
||||
paging_lock_recursive(d);
|
||||
|
||||
- shadow_free(d, page_to_mfn(pg));
|
||||
d->arch.paging.shadow.p2m_pages--;
|
||||
d->arch.paging.shadow.total_pages++;
|
||||
+ shadow_free(d, page_to_mfn(pg));
|
||||
|
||||
paging_unlock(d);
|
||||
}
|
|
@ -1,159 +0,0 @@
|
|||
From: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Subject: x86/p2m: free the paging memory pool preemptively
|
||||
|
||||
The paging memory pool is currently freed in two different places:
|
||||
from {shadow,hap}_teardown() via domain_relinquish_resources() and
|
||||
from {shadow,hap}_final_teardown() via complete_domain_destroy().
|
||||
While the former does handle preemption, the later doesn't.
|
||||
|
||||
Attempt to move as much p2m related freeing as possible to happen
|
||||
before the call to {shadow,hap}_teardown(), so that most memory can be
|
||||
freed in a preemptive way. In order to avoid causing issues to
|
||||
existing callers leave the root p2m page tables set and free them in
|
||||
{hap,shadow}_final_teardown(). Also modify {hap,shadow}_free to free
|
||||
the page immediately if the domain is dying, so that pages don't
|
||||
accumulate in the pool when {shadow,hap}_final_teardown() get called.
|
||||
|
||||
Move altp2m_vcpu_disable_ve() to be done in hap_teardown(), as that's
|
||||
the place where altp2m_active gets disabled now.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/domain.c
|
||||
+++ b/xen/arch/x86/domain.c
|
||||
@@ -38,7 +38,6 @@
|
||||
#include <xen/livepatch.h>
|
||||
#include <public/sysctl.h>
|
||||
#include <public/hvm/hvm_vcpu.h>
|
||||
-#include <asm/altp2m.h>
|
||||
#include <asm/regs.h>
|
||||
#include <asm/mc146818rtc.h>
|
||||
#include <asm/system.h>
|
||||
@@ -2381,12 +2380,6 @@ int domain_relinquish_resources(struct d
|
||||
vpmu_destroy(v);
|
||||
}
|
||||
|
||||
- if ( altp2m_active(d) )
|
||||
- {
|
||||
- for_each_vcpu ( d, v )
|
||||
- altp2m_vcpu_disable_ve(v);
|
||||
- }
|
||||
-
|
||||
if ( is_pv_domain(d) )
|
||||
{
|
||||
for_each_vcpu ( d, v )
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <xen/domain_page.h>
|
||||
#include <xen/guest_access.h>
|
||||
#include <xen/keyhandler.h>
|
||||
+#include <asm/altp2m.h>
|
||||
#include <asm/event.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
@@ -546,24 +547,8 @@ void hap_final_teardown(struct domain *d
|
||||
unsigned int i;
|
||||
|
||||
if ( hvm_altp2m_supported() )
|
||||
- {
|
||||
- d->arch.altp2m_active = 0;
|
||||
-
|
||||
- if ( d->arch.altp2m_eptp )
|
||||
- {
|
||||
- free_xenheap_page(d->arch.altp2m_eptp);
|
||||
- d->arch.altp2m_eptp = NULL;
|
||||
- }
|
||||
-
|
||||
- if ( d->arch.altp2m_visible_eptp )
|
||||
- {
|
||||
- free_xenheap_page(d->arch.altp2m_visible_eptp);
|
||||
- d->arch.altp2m_visible_eptp = NULL;
|
||||
- }
|
||||
-
|
||||
for ( i = 0; i < MAX_ALTP2M; i++ )
|
||||
p2m_teardown(d->arch.altp2m_p2m[i], true);
|
||||
- }
|
||||
|
||||
/* Destroy nestedp2m's first */
|
||||
for (i = 0; i < MAX_NESTEDP2M; i++) {
|
||||
@@ -578,6 +563,8 @@ void hap_final_teardown(struct domain *d
|
||||
paging_lock(d);
|
||||
hap_set_allocation(d, 0, NULL);
|
||||
ASSERT(d->arch.paging.hap.p2m_pages == 0);
|
||||
+ ASSERT(d->arch.paging.hap.free_pages == 0);
|
||||
+ ASSERT(d->arch.paging.hap.total_pages == 0);
|
||||
paging_unlock(d);
|
||||
}
|
||||
|
||||
@@ -603,6 +590,7 @@ void hap_vcpu_teardown(struct vcpu *v)
|
||||
void hap_teardown(struct domain *d, bool *preempted)
|
||||
{
|
||||
struct vcpu *v;
|
||||
+ unsigned int i;
|
||||
|
||||
ASSERT(d->is_dying);
|
||||
ASSERT(d != current->domain);
|
||||
@@ -611,6 +599,28 @@ void hap_teardown(struct domain *d, bool
|
||||
for_each_vcpu ( d, v )
|
||||
hap_vcpu_teardown(v);
|
||||
|
||||
+ /* Leave the root pt in case we get further attempts to modify the p2m. */
|
||||
+ if ( hvm_altp2m_supported() )
|
||||
+ {
|
||||
+ if ( altp2m_active(d) )
|
||||
+ for_each_vcpu ( d, v )
|
||||
+ altp2m_vcpu_disable_ve(v);
|
||||
+
|
||||
+ d->arch.altp2m_active = 0;
|
||||
+
|
||||
+ FREE_XENHEAP_PAGE(d->arch.altp2m_eptp);
|
||||
+ FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
|
||||
+
|
||||
+ for ( i = 0; i < MAX_ALTP2M; i++ )
|
||||
+ p2m_teardown(d->arch.altp2m_p2m[i], false);
|
||||
+ }
|
||||
+
|
||||
+ /* Destroy nestedp2m's after altp2m. */
|
||||
+ for ( i = 0; i < MAX_NESTEDP2M; i++ )
|
||||
+ p2m_teardown(d->arch.nested_p2m[i], false);
|
||||
+
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), false);
|
||||
+
|
||||
paging_lock(d); /* Keep various asserts happy */
|
||||
|
||||
if ( d->arch.paging.hap.total_pages != 0 )
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -2824,8 +2824,17 @@ void shadow_teardown(struct domain *d, b
|
||||
for_each_vcpu ( d, v )
|
||||
shadow_vcpu_teardown(v);
|
||||
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), false);
|
||||
+
|
||||
paging_lock(d);
|
||||
|
||||
+ /*
|
||||
+ * Reclaim all shadow memory so that shadow_set_allocation() doesn't find
|
||||
+ * in-use pages, as _shadow_prealloc() will no longer try to reclaim pages
|
||||
+ * because the domain is dying.
|
||||
+ */
|
||||
+ shadow_blow_tables(d);
|
||||
+
|
||||
#if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC))
|
||||
/* Free the virtual-TLB array attached to each vcpu */
|
||||
for_each_vcpu(d, v)
|
||||
@@ -2946,6 +2955,9 @@ void shadow_final_teardown(struct domain
|
||||
d->arch.paging.shadow.total_pages,
|
||||
d->arch.paging.shadow.free_pages,
|
||||
d->arch.paging.shadow.p2m_pages);
|
||||
+ ASSERT(!d->arch.paging.shadow.total_pages);
|
||||
+ ASSERT(!d->arch.paging.shadow.free_pages);
|
||||
+ ASSERT(!d->arch.paging.shadow.p2m_pages);
|
||||
paging_unlock(d);
|
||||
}
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
From: Julien Grall <jgrall@amazon.com>
|
||||
Subject: xen/x86: p2m: Add preemption in p2m_teardown()
|
||||
|
||||
The list p2m->pages contain all the pages used by the P2M. On large
|
||||
instance this can be quite large and the time spent to call
|
||||
d->arch.paging.free_page() will take more than 1ms for a 80GB guest
|
||||
on a Xen running in nested environment on a c5.metal.
|
||||
|
||||
By extrapolation, it would take > 100ms for a 8TB guest (what we
|
||||
current security support). So add some preemption in p2m_teardown()
|
||||
and propagate to the callers. Note there are 3 places where
|
||||
the preemption is not enabled:
|
||||
- hap_final_teardown()/shadow_final_teardown(): We are
|
||||
preventing update the P2M once the domain is dying (so
|
||||
no more pages could be allocated) and most of the P2M pages
|
||||
will be freed in preemptive manneer when relinquishing the
|
||||
resources. So this is fine to disable preemption.
|
||||
- shadow_enable(): This is fine because it will undo the allocation
|
||||
that may have been made by p2m_alloc_table() (so only the root
|
||||
page table).
|
||||
|
||||
The preemption is arbitrarily checked every 1024 iterations.
|
||||
|
||||
Note that with the current approach, Xen doesn't keep track on whether
|
||||
the alt/nested P2Ms have been cleared. So there are some redundant work.
|
||||
However, this is not expected to incurr too much overhead (the P2M lock
|
||||
shouldn't be contended during teardown). So this is optimization is
|
||||
left outside of the security event.
|
||||
|
||||
This is part of CVE-2022-33746 / XSA-410.
|
||||
|
||||
Signed-off-by: Julien Grall <jgrall@amazon.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/include/asm-x86/p2m.h
|
||||
+++ b/xen/include/asm-x86/p2m.h
|
||||
@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
|
||||
int p2m_alloc_table(struct p2m_domain *p2m);
|
||||
|
||||
/* Return all the p2m resources to Xen. */
|
||||
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
|
||||
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
|
||||
void p2m_final_teardown(struct domain *d);
|
||||
|
||||
/* Add a page to a domain's p2m table */
|
||||
--- a/xen/arch/x86/mm/hap/hap.c
|
||||
+++ b/xen/arch/x86/mm/hap/hap.c
|
||||
@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d
|
||||
|
||||
if ( hvm_altp2m_supported() )
|
||||
for ( i = 0; i < MAX_ALTP2M; i++ )
|
||||
- p2m_teardown(d->arch.altp2m_p2m[i], true);
|
||||
+ p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
|
||||
|
||||
/* Destroy nestedp2m's first */
|
||||
for (i = 0; i < MAX_NESTEDP2M; i++) {
|
||||
- p2m_teardown(d->arch.nested_p2m[i], true);
|
||||
+ p2m_teardown(d->arch.nested_p2m[i], true, NULL);
|
||||
}
|
||||
|
||||
if ( d->arch.paging.hap.total_pages != 0 )
|
||||
hap_teardown(d, NULL);
|
||||
|
||||
- p2m_teardown(p2m_get_hostp2m(d), true);
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), true, NULL);
|
||||
/* Free any memory that the p2m teardown released */
|
||||
paging_lock(d);
|
||||
hap_set_allocation(d, 0, NULL);
|
||||
@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool
|
||||
FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
|
||||
|
||||
for ( i = 0; i < MAX_ALTP2M; i++ )
|
||||
- p2m_teardown(d->arch.altp2m_p2m[i], false);
|
||||
+ {
|
||||
+ p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
|
||||
+ if ( preempted && *preempted )
|
||||
+ return;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Destroy nestedp2m's after altp2m. */
|
||||
for ( i = 0; i < MAX_NESTEDP2M; i++ )
|
||||
- p2m_teardown(d->arch.nested_p2m[i], false);
|
||||
+ {
|
||||
+ p2m_teardown(d->arch.nested_p2m[i], false, preempted);
|
||||
+ if ( preempted && *preempted )
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- p2m_teardown(p2m_get_hostp2m(d), false);
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), false, preempted);
|
||||
+ if ( preempted && *preempted )
|
||||
+ return;
|
||||
|
||||
paging_lock(d); /* Keep various asserts happy */
|
||||
|
||||
--- a/xen/arch/x86/mm/p2m.c
|
||||
+++ b/xen/arch/x86/mm/p2m.c
|
||||
@@ -749,12 +749,13 @@ int p2m_alloc_table(struct p2m_domain *p
|
||||
* hvm fixme: when adding support for pvh non-hardware domains, this path must
|
||||
* cleanup any foreign p2m types (release refcnts on them).
|
||||
*/
|
||||
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
|
||||
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
|
||||
/* Return all the p2m pages to Xen.
|
||||
* We know we don't have any extra mappings to these pages */
|
||||
{
|
||||
struct page_info *pg, *root_pg = NULL;
|
||||
struct domain *d;
|
||||
+ unsigned int i = 0;
|
||||
|
||||
if (p2m == NULL)
|
||||
return;
|
||||
@@ -773,8 +774,19 @@ void p2m_teardown(struct p2m_domain *p2m
|
||||
}
|
||||
|
||||
while ( (pg = page_list_remove_head(&p2m->pages)) )
|
||||
- if ( pg != root_pg )
|
||||
- d->arch.paging.free_page(d, pg);
|
||||
+ {
|
||||
+ if ( pg == root_pg )
|
||||
+ continue;
|
||||
+
|
||||
+ d->arch.paging.free_page(d, pg);
|
||||
+
|
||||
+ /* Arbitrarily check preemption every 1024 iterations */
|
||||
+ if ( preempted && !(++i % 1024) && general_preempt_check() )
|
||||
+ {
|
||||
+ *preempted = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
if ( root_pg )
|
||||
page_list_add(root_pg, &p2m->pages);
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -2770,8 +2770,12 @@ int shadow_enable(struct domain *d, u32
|
||||
out_locked:
|
||||
paging_unlock(d);
|
||||
out_unlocked:
|
||||
+ /*
|
||||
+ * This is fine to ignore the preemption here because only the root
|
||||
+ * will be allocated by p2m_alloc_table().
|
||||
+ */
|
||||
if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
|
||||
- p2m_teardown(p2m, true);
|
||||
+ p2m_teardown(p2m, true, NULL);
|
||||
if ( rv != 0 && pg != NULL )
|
||||
{
|
||||
pg->count_info &= ~PGC_count_mask;
|
||||
@@ -2824,7 +2828,9 @@ void shadow_teardown(struct domain *d, b
|
||||
for_each_vcpu ( d, v )
|
||||
shadow_vcpu_teardown(v);
|
||||
|
||||
- p2m_teardown(p2m_get_hostp2m(d), false);
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), false, preempted);
|
||||
+ if ( preempted && *preempted )
|
||||
+ return;
|
||||
|
||||
paging_lock(d);
|
||||
|
||||
@@ -2945,7 +2951,7 @@ void shadow_final_teardown(struct domain
|
||||
shadow_teardown(d, NULL);
|
||||
|
||||
/* It is now safe to pull down the p2m map. */
|
||||
- p2m_teardown(p2m_get_hostp2m(d), true);
|
||||
+ p2m_teardown(p2m_get_hostp2m(d), true, NULL);
|
||||
/* Free any shadow memory that the p2m teardown released */
|
||||
paging_lock(d);
|
||||
shadow_set_allocation(d, 0, NULL);
|
|
@ -1,55 +0,0 @@
|
|||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: gnttab: correct locking on transitive grant copy error path
|
||||
|
||||
While the comment next to the lock dropping in preparation of
|
||||
recursively calling acquire_grant_for_copy() mistakenly talks about the
|
||||
rd == td case (excluded a few lines further up), the same concerns apply
|
||||
to the calling of release_grant_for_copy() on a subsequent error path.
|
||||
|
||||
This is CVE-2022-33748 / XSA-411.
|
||||
|
||||
Fixes: ad48fb963dbf ("gnttab: fix transitive grant handling")
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
---
|
||||
v2: Extend code comment.
|
||||
|
||||
--- a/xen/common/grant_table.c
|
||||
+++ b/xen/common/grant_table.c
|
||||
@@ -2622,9 +2622,8 @@ acquire_grant_for_copy(
|
||||
trans_domid);
|
||||
|
||||
/*
|
||||
- * acquire_grant_for_copy() could take the lock on the
|
||||
- * remote table (if rd == td), so we have to drop the lock
|
||||
- * here and reacquire.
|
||||
+ * acquire_grant_for_copy() will take the lock on the remote table,
|
||||
+ * so we have to drop the lock here and reacquire.
|
||||
*/
|
||||
active_entry_release(act);
|
||||
grant_read_unlock(rgt);
|
||||
@@ -2661,11 +2660,25 @@ acquire_grant_for_copy(
|
||||
act->trans_gref != trans_gref ||
|
||||
!act->is_sub_page)) )
|
||||
{
|
||||
+ /*
|
||||
+ * Like above for acquire_grant_for_copy() we need to drop and then
|
||||
+ * re-acquire the locks here to prevent lock order inversion issues.
|
||||
+ * Unlike for acquire_grant_for_copy() we don't need to re-check
|
||||
+ * anything, as release_grant_for_copy() doesn't depend on the grant
|
||||
+ * table entry: It only updates internal state and the status flags.
|
||||
+ */
|
||||
+ active_entry_release(act);
|
||||
+ grant_read_unlock(rgt);
|
||||
+
|
||||
release_grant_for_copy(td, trans_gref, readonly);
|
||||
rcu_unlock_domain(td);
|
||||
+
|
||||
+ grant_read_lock(rgt);
|
||||
+ act = active_entry_acquire(rgt, gref);
|
||||
reduce_status_for_pin(rd, act, status, readonly);
|
||||
active_entry_release(act);
|
||||
grant_read_unlock(rgt);
|
||||
+
|
||||
put_page(*page);
|
||||
*page = NULL;
|
||||
return ERESTART;
|
Loading…
Reference in New Issue