Merge 5.16-rc4 into tty-next
We need the tty/serial driver fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
commit
f5bced9f34
|
@ -53,11 +53,10 @@ The number of bits that the PAC occupies in a pointer is 55 minus the
|
|||
virtual address size configured by the kernel. For example, with a
|
||||
virtual address size of 48, the PAC is 7 bits wide.
|
||||
|
||||
Recent versions of GCC can compile code with APIAKey-based return
|
||||
address protection when passed the -msign-return-address option. This
|
||||
uses instructions in the HINT space (unless -march=armv8.3-a or higher
|
||||
is also passed), and such code can run on systems without the pointer
|
||||
authentication extension.
|
||||
When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled
|
||||
with HINT space pointer authentication instructions protecting
|
||||
function returns. Kernels built with this option will work on hardware
|
||||
with or without pointer authentication support.
|
||||
|
||||
In addition to exec(), keys can also be reinitialized to random values
|
||||
using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,
|
||||
|
|
|
@ -73,12 +73,12 @@ CPUFREQ_POSTCHANGE.
|
|||
The third argument is a struct cpufreq_freqs with the following
|
||||
values:
|
||||
|
||||
===== ===========================
|
||||
cpu number of the affected CPU
|
||||
====== ======================================
|
||||
policy a pointer to the struct cpufreq_policy
|
||||
old old frequency
|
||||
new new frequency
|
||||
flags flags of the cpufreq driver
|
||||
===== ===========================
|
||||
====== ======================================
|
||||
|
||||
3. CPUFreq Table Generation with Operating Performance Point (OPP)
|
||||
==================================================================
|
||||
|
|
|
@ -17,9 +17,10 @@ properties:
|
|||
oneOf:
|
||||
- enum:
|
||||
- fsl,imx7ulp-lpi2c
|
||||
- fsl,imx8qm-lpi2c
|
||||
- items:
|
||||
- const: fsl,imx8qxp-lpi2c
|
||||
- enum:
|
||||
- fsl,imx8qxp-lpi2c
|
||||
- fsl,imx8qm-lpi2c
|
||||
- const: fsl,imx7ulp-lpi2c
|
||||
|
||||
reg:
|
||||
|
|
|
@ -50,11 +50,11 @@ ksmbd.mountd (user space daemon)
|
|||
--------------------------------
|
||||
|
||||
ksmbd.mountd is userspace process to, transfer user account and password that
|
||||
are registered using ksmbd.adduser(part of utils for user space). Further it
|
||||
are registered using ksmbd.adduser (part of utils for user space). Further it
|
||||
allows sharing information parameters that parsed from smb.conf to ksmbd in
|
||||
kernel. For the execution part it has a daemon which is continuously running
|
||||
and connected to the kernel interface using netlink socket, it waits for the
|
||||
requests(dcerpc and share/user info). It handles RPC calls (at a minimum few
|
||||
requests (dcerpc and share/user info). It handles RPC calls (at a minimum few
|
||||
dozen) that are most important for file server from NetShareEnum and
|
||||
NetServerGetInfo. Complete DCE/RPC response is prepared from the user space
|
||||
and passed over to the associated kernel thread for the client.
|
||||
|
@ -154,11 +154,11 @@ Each layer
|
|||
1. Enable all component prints
|
||||
# sudo ksmbd.control -d "all"
|
||||
|
||||
2. Enable one of components(smb, auth, vfs, oplock, ipc, conn, rdma)
|
||||
2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma)
|
||||
# sudo ksmbd.control -d "smb"
|
||||
|
||||
3. Show what prints are enable.
|
||||
# cat/sys/class/ksmbd-control/debug
|
||||
3. Show what prints are enabled.
|
||||
# cat /sys/class/ksmbd-control/debug
|
||||
[smb] auth vfs oplock ipc conn [rdma]
|
||||
|
||||
4. Disable prints:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================================
|
||||
NETWORK FILESYSTEM HELPER LIBRARY
|
||||
Network Filesystem Helper Library
|
||||
=================================
|
||||
|
||||
.. Contents:
|
||||
|
@ -37,22 +37,22 @@ into a common call framework.
|
|||
|
||||
The following services are provided:
|
||||
|
||||
* Handles transparent huge pages (THPs).
|
||||
* Handle folios that span multiple pages.
|
||||
|
||||
* Insulates the netfs from VM interface changes.
|
||||
* Insulate the netfs from VM interface changes.
|
||||
|
||||
* Allows the netfs to arbitrarily split reads up into pieces, even ones that
|
||||
don't match page sizes or page alignments and that may cross pages.
|
||||
* Allow the netfs to arbitrarily split reads up into pieces, even ones that
|
||||
don't match folio sizes or folio alignments and that may cross folios.
|
||||
|
||||
* Allows the netfs to expand a readahead request in both directions to meet
|
||||
its needs.
|
||||
* Allow the netfs to expand a readahead request in both directions to meet its
|
||||
needs.
|
||||
|
||||
* Allows the netfs to partially fulfil a read, which will then be resubmitted.
|
||||
* Allow the netfs to partially fulfil a read, which will then be resubmitted.
|
||||
|
||||
* Handles local caching, allowing cached data and server-read data to be
|
||||
* Handle local caching, allowing cached data and server-read data to be
|
||||
interleaved for a single request.
|
||||
|
||||
* Handles clearing of bufferage that aren't on the server.
|
||||
* Handle clearing of bufferage that aren't on the server.
|
||||
|
||||
* Handle retrying of reads that failed, switching reads from the cache to the
|
||||
server as necessary.
|
||||
|
@ -70,22 +70,22 @@ Read Helper Functions
|
|||
|
||||
Three read helpers are provided::
|
||||
|
||||
* void netfs_readahead(struct readahead_control *ractl,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);``
|
||||
* int netfs_readpage(struct file *file,
|
||||
struct page *page,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
* int netfs_write_begin(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos,
|
||||
unsigned int len,
|
||||
unsigned int flags,
|
||||
struct page **_page,
|
||||
void **_fsdata,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
void netfs_readahead(struct readahead_control *ractl,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
int netfs_readpage(struct file *file,
|
||||
struct folio *folio,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
int netfs_write_begin(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos,
|
||||
unsigned int len,
|
||||
unsigned int flags,
|
||||
struct folio **_folio,
|
||||
void **_fsdata,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
|
||||
Each corresponds to a VM operation, with the addition of a couple of parameters
|
||||
for the use of the read helpers:
|
||||
|
@ -103,8 +103,8 @@ Both of these values will be stored into the read request structure.
|
|||
For ->readahead() and ->readpage(), the network filesystem should just jump
|
||||
into the corresponding read helper; whereas for ->write_begin(), it may be a
|
||||
little more complicated as the network filesystem might want to flush
|
||||
conflicting writes or track dirty data and needs to put the acquired page if an
|
||||
error occurs after calling the helper.
|
||||
conflicting writes or track dirty data and needs to put the acquired folio if
|
||||
an error occurs after calling the helper.
|
||||
|
||||
The helpers manage the read request, calling back into the network filesystem
|
||||
through the suppplied table of operations. Waits will be performed as
|
||||
|
@ -253,7 +253,7 @@ through which it can issue requests and negotiate::
|
|||
void (*issue_op)(struct netfs_read_subrequest *subreq);
|
||||
bool (*is_still_valid)(struct netfs_read_request *rreq);
|
||||
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
|
||||
struct page *page, void **_fsdata);
|
||||
struct folio *folio, void **_fsdata);
|
||||
void (*done)(struct netfs_read_request *rreq);
|
||||
void (*cleanup)(struct address_space *mapping, void *netfs_priv);
|
||||
};
|
||||
|
@ -313,13 +313,14 @@ The operations are as follows:
|
|||
|
||||
There is no return value; the netfs_subreq_terminated() function should be
|
||||
called to indicate whether or not the operation succeeded and how much data
|
||||
it transferred. The filesystem also should not deal with setting pages
|
||||
it transferred. The filesystem also should not deal with setting folios
|
||||
uptodate, unlocking them or dropping their refs - the helpers need to deal
|
||||
with this as they have to coordinate with copying to the local cache.
|
||||
|
||||
Note that the helpers have the pages locked, but not pinned. It is possible
|
||||
to use the ITER_XARRAY iov iterator to refer to the range of the inode that
|
||||
is being operated upon without the need to allocate large bvec tables.
|
||||
Note that the helpers have the folios locked, but not pinned. It is
|
||||
possible to use the ITER_XARRAY iov iterator to refer to the range of the
|
||||
inode that is being operated upon without the need to allocate large bvec
|
||||
tables.
|
||||
|
||||
* ``is_still_valid()``
|
||||
|
||||
|
@ -330,15 +331,15 @@ The operations are as follows:
|
|||
* ``check_write_begin()``
|
||||
|
||||
[Optional] This is called from the netfs_write_begin() helper once it has
|
||||
allocated/grabbed the page to be modified to allow the filesystem to flush
|
||||
allocated/grabbed the folio to be modified to allow the filesystem to flush
|
||||
conflicting state before allowing it to be modified.
|
||||
|
||||
It should return 0 if everything is now fine, -EAGAIN if the page should be
|
||||
It should return 0 if everything is now fine, -EAGAIN if the folio should be
|
||||
regrabbed and any other error code to abort the operation.
|
||||
|
||||
* ``done``
|
||||
|
||||
[Optional] This is called after the pages in the request have all been
|
||||
[Optional] This is called after the folios in the request have all been
|
||||
unlocked (and marked uptodate if applicable).
|
||||
|
||||
* ``cleanup``
|
||||
|
@ -390,7 +391,7 @@ The read helpers work by the following general procedure:
|
|||
* If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
|
||||
end of the slice instead of reissuing.
|
||||
|
||||
* Once the data is read, the pages that have been fully read/cleared:
|
||||
* Once the data is read, the folios that have been fully read/cleared:
|
||||
|
||||
* Will be marked uptodate.
|
||||
|
||||
|
@ -398,11 +399,11 @@ The read helpers work by the following general procedure:
|
|||
|
||||
* Unlocked
|
||||
|
||||
* Any pages that need writing to the cache will then have DIO writes issued.
|
||||
* Any folios that need writing to the cache will then have DIO writes issued.
|
||||
|
||||
* Synchronous operations will wait for reading to be complete.
|
||||
|
||||
* Writes to the cache will proceed asynchronously and the pages will have the
|
||||
* Writes to the cache will proceed asynchronously and the folios will have the
|
||||
PG_fscache mark removed when that completes.
|
||||
|
||||
* The request structures will be cleaned up when everything has completed.
|
||||
|
@ -452,6 +453,9 @@ operation table looks like the following::
|
|||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv);
|
||||
|
||||
int (*prepare_write)(struct netfs_cache_resources *cres,
|
||||
loff_t *_start, size_t *_len, loff_t i_size);
|
||||
|
||||
int (*write)(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
|
@ -509,6 +513,14 @@ The methods defined in the table are:
|
|||
indicating whether the termination is definitely happening in the caller's
|
||||
context.
|
||||
|
||||
* ``prepare_write()``
|
||||
|
||||
[Required] Called to adjust a write to the cache and check that there is
|
||||
sufficient space in the cache. The start and length values indicate the
|
||||
size of the write that netfslib is proposing, and this can be adjusted by
|
||||
the cache to respect DIO boundaries. The file size is passed for
|
||||
information.
|
||||
|
||||
* ``write()``
|
||||
|
||||
[Required] Called to write to the cache. The start file offset is given
|
||||
|
@ -525,4 +537,9 @@ not the read request structure as they could be used in other situations where
|
|||
there isn't a read request structure as well, such as writing dirty data to the
|
||||
cache.
|
||||
|
||||
|
||||
API Function Reference
|
||||
======================
|
||||
|
||||
.. kernel-doc:: include/linux/netfs.h
|
||||
.. kernel-doc:: fs/netfs/read_helper.c
|
||||
|
|
|
@ -36,6 +36,8 @@ Key to symbols
|
|||
|
||||
=============== =============================================================
|
||||
S Start condition
|
||||
Sr Repeated start condition, used to switch from write to
|
||||
read mode.
|
||||
P Stop condition
|
||||
Rd/Wr (1 bit) Read/Write bit. Rd equals 1, Wr equals 0.
|
||||
A, NA (1 bit) Acknowledge (ACK) and Not Acknowledge (NACK) bit
|
||||
|
@ -100,7 +102,7 @@ Implemented by i2c_smbus_read_byte_data()
|
|||
This reads a single byte from a device, from a designated register.
|
||||
The register is specified through the Comm byte::
|
||||
|
||||
S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
|
||||
S Addr Wr [A] Comm [A] Sr Addr Rd [A] [Data] NA P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
|
||||
|
||||
|
@ -114,7 +116,7 @@ This operation is very like Read Byte; again, data is read from a
|
|||
device, from a designated register that is specified through the Comm
|
||||
byte. But this time, the data is a complete word (16 bits)::
|
||||
|
||||
S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
|
||||
S Addr Wr [A] Comm [A] Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
|
||||
|
||||
|
@ -164,7 +166,7 @@ This command selects a device register (through the Comm byte), sends
|
|||
16 bits of data to it, and reads 16 bits of data in return::
|
||||
|
||||
S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
|
||||
S Addr Rd [A] [DataLow] A [DataHigh] NA P
|
||||
Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
|
||||
|
||||
|
@ -181,7 +183,7 @@ of data is specified by the device in the Count byte.
|
|||
::
|
||||
|
||||
S Addr Wr [A] Comm [A]
|
||||
S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
|
||||
Sr Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
|
||||
|
||||
|
@ -212,7 +214,7 @@ This command selects a device register (through the Comm byte), sends
|
|||
1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return::
|
||||
|
||||
S Addr Wr [A] Comm [A] Count [A] Data [A] ...
|
||||
S Addr Rd [A] [Count] A [Data] ... A P
|
||||
Sr Addr Rd [A] [Count] A [Data] ... A P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
|
||||
|
||||
|
@ -300,7 +302,7 @@ This command reads a block of bytes from a device, from a
|
|||
designated register that is specified through the Comm byte::
|
||||
|
||||
S Addr Wr [A] Comm [A]
|
||||
S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
|
||||
Sr Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
|
||||
|
||||
Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
|
||||
|
||||
|
|
|
@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
|
|||
|
||||
0: disable any special handling on port reuse. The new
|
||||
connection will be delivered to the same real server that was
|
||||
servicing the previous connection. This will effectively
|
||||
disable expire_nodest_conn.
|
||||
servicing the previous connection.
|
||||
|
||||
bit 1: enable rescheduling of new connections when it is safe.
|
||||
That is, whenever expire_nodest_conn and for TCP sockets, when
|
||||
|
|
|
@ -486,8 +486,8 @@ of packets.
|
|||
Drivers are free to use a more permissive configuration than the requested
|
||||
configuration. It is expected that drivers should only implement directly the
|
||||
most generic mode that can be supported. For example if the hardware can
|
||||
support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale
|
||||
HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT
|
||||
support HWTSTAMP_FILTER_PTP_V2_EVENT, then it should generally always upscale
|
||||
HWTSTAMP_FILTER_PTP_V2_L2_SYNC, and so forth, as HWTSTAMP_FILTER_PTP_V2_EVENT
|
||||
is more generic (and more useful to applications).
|
||||
|
||||
A driver which supports hardware time stamping shall update the struct
|
||||
|
|
32
MAINTAINERS
32
MAINTAINERS
|
@ -2263,6 +2263,15 @@ L: linux-iio@vger.kernel.org
|
|||
S: Maintained
|
||||
F: drivers/counter/microchip-tcb-capture.c
|
||||
|
||||
ARM/MILBEAUT ARCHITECTURE
|
||||
M: Taichi Sugaya <sugaya.taichi@socionext.com>
|
||||
M: Takao Orito <orito.takao@socionext.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
F: arch/arm/boot/dts/milbeaut*
|
||||
F: arch/arm/mach-milbeaut/
|
||||
N: milbeaut
|
||||
|
||||
ARM/MIOA701 MACHINE SUPPORT
|
||||
M: Robert Jarzmik <robert.jarzmik@free.fr>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
|
@ -2729,10 +2738,11 @@ S: Maintained
|
|||
F: drivers/memory/*emif*
|
||||
|
||||
ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
|
||||
M: Nishanth Menon <nm@ti.com>
|
||||
M: Santosh Shilimkar <ssantosh@kernel.org>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
|
||||
F: arch/arm/boot/dts/keystone-*
|
||||
F: arch/arm/mach-keystone/
|
||||
|
||||
|
@ -3570,13 +3580,14 @@ L: netdev@vger.kernel.org
|
|||
S: Supported
|
||||
F: drivers/net/ethernet/broadcom/b44.*
|
||||
|
||||
BROADCOM B53 ETHERNET SWITCH DRIVER
|
||||
BROADCOM B53/SF2 ETHERNET SWITCH DRIVER
|
||||
M: Florian Fainelli <f.fainelli@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: openwrt-devel@lists.openwrt.org (subscribers-only)
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
|
||||
F: drivers/net/dsa/b53/*
|
||||
F: drivers/net/dsa/bcm_sf2*
|
||||
F: include/linux/dsa/brcm.h
|
||||
F: include/linux/platform_data/b53.h
|
||||
|
||||
|
@ -15968,6 +15979,7 @@ F: arch/mips/generic/board-ranchu.c
|
|||
|
||||
RANDOM NUMBER DRIVER
|
||||
M: "Theodore Ts'o" <tytso@mit.edu>
|
||||
M: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
S: Maintained
|
||||
F: drivers/char/random.c
|
||||
|
||||
|
@ -16490,6 +16502,12 @@ T: git git://linuxtv.org/media_tree.git
|
|||
F: Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml
|
||||
F: drivers/media/platform/sunxi/sun8i-rotate/
|
||||
|
||||
RPMSG TTY DRIVER
|
||||
M: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
|
||||
L: linux-remoteproc@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/tty/rpmsg_tty.c
|
||||
|
||||
RTL2830 MEDIA DRIVER
|
||||
M: Antti Palosaari <crope@iki.fi>
|
||||
L: linux-media@vger.kernel.org
|
||||
|
@ -16612,7 +16630,8 @@ F: drivers/iommu/s390-iommu.c
|
|||
|
||||
S390 IUCV NETWORK LAYER
|
||||
M: Julian Wiedmann <jwi@linux.ibm.com>
|
||||
M: Karsten Graul <kgraul@linux.ibm.com>
|
||||
M: Alexandra Winter <wintera@linux.ibm.com>
|
||||
M: Wenjia Zhang <wenjia@linux.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
|
@ -16623,7 +16642,8 @@ F: net/iucv/
|
|||
|
||||
S390 NETWORK DRIVERS
|
||||
M: Julian Wiedmann <jwi@linux.ibm.com>
|
||||
M: Karsten Graul <kgraul@linux.ibm.com>
|
||||
M: Alexandra Winter <wintera@linux.ibm.com>
|
||||
M: Wenjia Zhang <wenjia@linux.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
|
@ -18483,6 +18503,7 @@ F: include/uapi/linux/pkt_sched.h
|
|||
F: include/uapi/linux/tc_act/
|
||||
F: include/uapi/linux/tc_ematch/
|
||||
F: net/sched/
|
||||
F: tools/testing/selftests/tc-testing
|
||||
|
||||
TC90522 MEDIA DRIVER
|
||||
M: Akihiro Tsukada <tskd08@gmail.com>
|
||||
|
@ -19031,11 +19052,12 @@ F: drivers/mmc/host/tifm_sd.c
|
|||
F: include/linux/tifm.h
|
||||
|
||||
TI KEYSTONE MULTICORE NAVIGATOR DRIVERS
|
||||
M: Nishanth Menon <nm@ti.com>
|
||||
M: Santosh Shilimkar <ssantosh@kernel.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
|
||||
F: drivers/soc/ti/*
|
||||
|
||||
TI LM49xxx FAMILY ASoC CODEC DRIVERS
|
||||
|
|
4
Makefile
4
Makefile
|
@ -2,8 +2,8 @@
|
|||
VERSION = 5
|
||||
PATCHLEVEL = 16
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
NAME = Trick or Treat
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Gobble Gobble
|
||||
|
||||
# *DOCUMENTATION*
|
||||
# To see a list of typical targets execute "make help"
|
||||
|
|
10
arch/Kconfig
10
arch/Kconfig
|
@ -991,6 +991,16 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
|
|||
and vice-versa 32-bit applications to call 64-bit mmap().
|
||||
Required for applications doing different bitness syscalls.
|
||||
|
||||
config PAGE_SIZE_LESS_THAN_64KB
|
||||
def_bool y
|
||||
depends on !ARM64_64K_PAGES
|
||||
depends on !IA64_PAGE_SIZE_64KB
|
||||
depends on !PAGE_SIZE_64KB
|
||||
depends on !PARISC_PAGE_SIZE_64KB
|
||||
depends on !PPC_64K_PAGES
|
||||
depends on !PPC_256K_PAGES
|
||||
depends on !PAGE_SIZE_256KB
|
||||
|
||||
# This allows to use a set of generic functions to determine mmap base
|
||||
# address by giving priority to top-down scheme only if the process
|
||||
# is not in legacy mode (compat task, unlimited stack size or
|
||||
|
|
|
@ -488,3 +488,4 @@
|
|||
556 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 557 reserved for memfd_secret
|
||||
558 common process_mrelease sys_process_mrelease
|
||||
559 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
|
|||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
|
||||
void flush_dcache_page(struct page *page);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
|
||||
void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
|
||||
void dma_cache_inv(phys_addr_t start, unsigned long sz);
|
||||
|
|
|
@ -506,11 +506,17 @@
|
|||
#address-cells = <3>;
|
||||
#interrupt-cells = <1>;
|
||||
#size-cells = <2>;
|
||||
interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
|
||||
interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "pcie", "msi";
|
||||
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
|
||||
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143
|
||||
IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 2 &gicv2 GIC_SPI 144
|
||||
IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 3 &gicv2 GIC_SPI 145
|
||||
IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0 0 0 4 &gicv2 GIC_SPI 146
|
||||
IRQ_TYPE_LEVEL_HIGH>;
|
||||
msi-controller;
|
||||
msi-parent = <&pcie0>;
|
||||
|
|
|
@ -242,6 +242,8 @@
|
|||
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
};
|
||||
|
||||
pcie0: pcie@12000 {
|
||||
|
@ -408,7 +410,7 @@
|
|||
i2c0: i2c@18009000 {
|
||||
compatible = "brcm,iproc-i2c";
|
||||
reg = <0x18009000 0x50>;
|
||||
interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
clock-frequency = <100000>;
|
||||
|
|
|
@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
|
|||
*/
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
extern void flush_dcache_page(struct page *);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
|
||||
static inline void flush_kernel_vmap_range(void *addr, int size)
|
||||
|
|
|
@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr;
|
|||
u32 socfpga_sdram_self_refresh(u32 sdr_base);
|
||||
extern unsigned int socfpga_sdram_self_refresh_sz;
|
||||
|
||||
extern char secondary_trampoline, secondary_trampoline_end;
|
||||
extern char secondary_trampoline[], secondary_trampoline_end[];
|
||||
|
||||
extern unsigned long socfpga_cpu1start_addr;
|
||||
|
||||
|
|
|
@ -20,14 +20,14 @@
|
|||
|
||||
static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
|
||||
int trampoline_size = secondary_trampoline_end - secondary_trampoline;
|
||||
|
||||
if (socfpga_cpu1start_addr) {
|
||||
/* This will put CPU #1 into reset. */
|
||||
writel(RSTMGR_MPUMODRST_CPU1,
|
||||
rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST);
|
||||
|
||||
memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
|
||||
memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
|
||||
|
||||
writel(__pa_symbol(secondary_startup),
|
||||
sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
|
||||
|
@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
|||
|
||||
static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
|
||||
int trampoline_size = secondary_trampoline_end - secondary_trampoline;
|
||||
|
||||
if (socfpga_cpu1start_addr) {
|
||||
writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr +
|
||||
SOCFPGA_A10_RSTMGR_MODMPURST);
|
||||
memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
|
||||
memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
|
||||
|
||||
writel(__pa_symbol(secondary_startup),
|
||||
sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
|
||||
|
|
|
@ -296,8 +296,7 @@
|
|||
pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>;
|
||||
phys = <&ufs_0_phy>;
|
||||
phy-names = "ufs-phy";
|
||||
samsung,sysreg = <&syscon_fsys2>;
|
||||
samsung,ufs-shareability-reg-offset = <0x710>;
|
||||
samsung,sysreg = <&syscon_fsys2 0x710>;
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
|
|
@ -12,6 +12,17 @@
|
|||
|
||||
#define HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
|
||||
/*
|
||||
* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
|
||||
* "return address pointer" which can be used to uniquely identify a return
|
||||
* address which has been overwritten.
|
||||
*
|
||||
* On arm64 we use the address of the caller's frame record, which remains the
|
||||
* same for the lifetime of the instrumented function, unlike the return
|
||||
* address in the LR.
|
||||
*/
|
||||
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
||||
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
||||
#else
|
||||
|
|
|
@ -91,7 +91,7 @@
|
|||
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
|
||||
|
||||
/* TCR_EL2 Registers bits */
|
||||
#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
|
||||
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
|
||||
#define TCR_EL2_TBI (1 << 20)
|
||||
#define TCR_EL2_PS_SHIFT 16
|
||||
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
|
||||
|
@ -276,7 +276,7 @@
|
|||
#define CPTR_EL2_TFP_SHIFT 10
|
||||
|
||||
/* Hyp Coprocessor Trap Register */
|
||||
#define CPTR_EL2_TCPAC (1 << 31)
|
||||
#define CPTR_EL2_TCPAC (1U << 31)
|
||||
#define CPTR_EL2_TAM (1 << 30)
|
||||
#define CPTR_EL2_TTA (1 << 20)
|
||||
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
|
||||
|
|
|
@ -76,7 +76,7 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
|
|||
static inline void
|
||||
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
|
||||
{
|
||||
VM_BUG_ON(mm != &init_mm);
|
||||
VM_BUG_ON(mm && mm != &init_mm);
|
||||
__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
|
||||
}
|
||||
|
||||
|
|
|
@ -47,9 +47,6 @@ struct stack_info {
|
|||
* @prev_type: The type of stack this frame record was on, or a synthetic
|
||||
* value of STACK_TYPE_UNKNOWN. This is used to detect a
|
||||
* transition from one stack to another.
|
||||
*
|
||||
* @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
|
||||
* replacement lr value in the ftrace graph stack.
|
||||
*/
|
||||
struct stackframe {
|
||||
unsigned long fp;
|
||||
|
@ -57,9 +54,6 @@ struct stackframe {
|
|||
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
|
||||
unsigned long prev_fp;
|
||||
enum stack_type prev_type;
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
int graph;
|
||||
#endif
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_node *kr_cur;
|
||||
#endif
|
||||
|
|
|
@ -281,12 +281,22 @@ do { \
|
|||
(x) = (__force __typeof__(*(ptr)))__gu_val; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We must not call into the scheduler between uaccess_ttbr0_enable() and
|
||||
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
|
||||
* we must evaluate these outside of the critical section.
|
||||
*/
|
||||
#define __raw_get_user(x, ptr, err) \
|
||||
do { \
|
||||
__typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
|
||||
__typeof__(x) __rgu_val; \
|
||||
__chk_user_ptr(ptr); \
|
||||
\
|
||||
uaccess_ttbr0_enable(); \
|
||||
__raw_get_mem("ldtr", x, ptr, err); \
|
||||
__raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \
|
||||
uaccess_ttbr0_disable(); \
|
||||
\
|
||||
(x) = __rgu_val; \
|
||||
} while (0)
|
||||
|
||||
#define __get_user_error(x, ptr, err) \
|
||||
|
@ -310,14 +320,22 @@ do { \
|
|||
|
||||
#define get_user __get_user
|
||||
|
||||
/*
|
||||
* We must not call into the scheduler between __uaccess_enable_tco_async() and
|
||||
* __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
|
||||
* functions, we must evaluate these outside of the critical section.
|
||||
*/
|
||||
#define __get_kernel_nofault(dst, src, type, err_label) \
|
||||
do { \
|
||||
__typeof__(dst) __gkn_dst = (dst); \
|
||||
__typeof__(src) __gkn_src = (src); \
|
||||
int __gkn_err = 0; \
|
||||
\
|
||||
__uaccess_enable_tco_async(); \
|
||||
__raw_get_mem("ldr", *((type *)(dst)), \
|
||||
(__force type *)(src), __gkn_err); \
|
||||
__raw_get_mem("ldr", *((type *)(__gkn_dst)), \
|
||||
(__force type *)(__gkn_src), __gkn_err); \
|
||||
__uaccess_disable_tco_async(); \
|
||||
\
|
||||
if (unlikely(__gkn_err)) \
|
||||
goto err_label; \
|
||||
} while (0)
|
||||
|
@ -351,11 +369,19 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We must not call into the scheduler between uaccess_ttbr0_enable() and
|
||||
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
|
||||
* we must evaluate these outside of the critical section.
|
||||
*/
|
||||
#define __raw_put_user(x, ptr, err) \
|
||||
do { \
|
||||
__chk_user_ptr(ptr); \
|
||||
__typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
|
||||
__typeof__(*(ptr)) __rpu_val = (x); \
|
||||
__chk_user_ptr(__rpu_ptr); \
|
||||
\
|
||||
uaccess_ttbr0_enable(); \
|
||||
__raw_put_mem("sttr", x, ptr, err); \
|
||||
__raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \
|
||||
uaccess_ttbr0_disable(); \
|
||||
} while (0)
|
||||
|
||||
|
@ -380,14 +406,22 @@ do { \
|
|||
|
||||
#define put_user __put_user
|
||||
|
||||
/*
|
||||
* We must not call into the scheduler between __uaccess_enable_tco_async() and
|
||||
* __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
|
||||
* functions, we must evaluate these outside of the critical section.
|
||||
*/
|
||||
#define __put_kernel_nofault(dst, src, type, err_label) \
|
||||
do { \
|
||||
__typeof__(dst) __pkn_dst = (dst); \
|
||||
__typeof__(src) __pkn_src = (src); \
|
||||
int __pkn_err = 0; \
|
||||
\
|
||||
__uaccess_enable_tco_async(); \
|
||||
__raw_put_mem("str", *((type *)(src)), \
|
||||
(__force type *)(dst), __pkn_err); \
|
||||
__raw_put_mem("str", *((type *)(__pkn_src)), \
|
||||
(__force type *)(__pkn_dst), __pkn_err); \
|
||||
__uaccess_disable_tco_async(); \
|
||||
\
|
||||
if (unlikely(__pkn_err)) \
|
||||
goto err_label; \
|
||||
} while(0)
|
||||
|
|
|
@ -77,11 +77,17 @@
|
|||
.endm
|
||||
|
||||
SYM_CODE_START(ftrace_regs_caller)
|
||||
#ifdef BTI_C
|
||||
BTI_C
|
||||
#endif
|
||||
ftrace_regs_entry 1
|
||||
b ftrace_common
|
||||
SYM_CODE_END(ftrace_regs_caller)
|
||||
|
||||
SYM_CODE_START(ftrace_caller)
|
||||
#ifdef BTI_C
|
||||
BTI_C
|
||||
#endif
|
||||
ftrace_regs_entry 0
|
||||
b ftrace_common
|
||||
SYM_CODE_END(ftrace_caller)
|
||||
|
|
|
@ -244,8 +244,6 @@ void arch_ftrace_update_code(int command)
|
|||
* on the way back to parent. For this purpose, this function is called
|
||||
* in _mcount() or ftrace_caller() to replace return address (*parent) on
|
||||
* the call stack to return_to_handler.
|
||||
*
|
||||
* Note that @frame_pointer is used only for sanity check later.
|
||||
*/
|
||||
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
||||
unsigned long frame_pointer)
|
||||
|
@ -263,8 +261,10 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
|||
*/
|
||||
old = *parent;
|
||||
|
||||
if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
|
||||
if (!function_graph_enter(old, self_addr, frame_pointer,
|
||||
(void *)frame_pointer)) {
|
||||
*parent = return_hooker;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
|
|
|
@ -147,7 +147,7 @@ int machine_kexec_post_load(struct kimage *kimage)
|
|||
if (rc)
|
||||
return rc;
|
||||
kimage->arch.ttbr1 = __pa(trans_pgd);
|
||||
kimage->arch.zero_page = __pa(empty_zero_page);
|
||||
kimage->arch.zero_page = __pa_symbol(empty_zero_page);
|
||||
|
||||
reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
|
||||
memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
|
||||
|
|
|
@ -38,9 +38,6 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
|
|||
{
|
||||
frame->fp = fp;
|
||||
frame->pc = pc;
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
frame->graph = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
frame->kr_cur = NULL;
|
||||
#endif
|
||||
|
@ -116,20 +113,23 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
|
|||
frame->prev_fp = fp;
|
||||
frame->prev_type = info.type;
|
||||
|
||||
frame->pc = ptrauth_strip_insn_pac(frame->pc);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
if (tsk->ret_stack &&
|
||||
(ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
|
||||
struct ftrace_ret_stack *ret_stack;
|
||||
(frame->pc == (unsigned long)return_to_handler)) {
|
||||
unsigned long orig_pc;
|
||||
/*
|
||||
* This is a case where function graph tracer has
|
||||
* modified a return address (LR) in a stack frame
|
||||
* to hook a function return.
|
||||
* So replace it to an original value.
|
||||
*/
|
||||
ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
|
||||
if (WARN_ON_ONCE(!ret_stack))
|
||||
orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
|
||||
(void *)frame->fp);
|
||||
if (WARN_ON_ONCE(frame->pc == orig_pc))
|
||||
return -EINVAL;
|
||||
frame->pc = ret_stack->ret;
|
||||
frame->pc = orig_pc;
|
||||
}
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
|
@ -137,8 +137,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
|
|||
frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
|
||||
#endif
|
||||
|
||||
frame->pc = ptrauth_strip_insn_pac(frame->pc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(unwind_frame);
|
||||
|
|
|
@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
|
|||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
|
||||
/*
|
||||
* Allow the hypervisor to handle the exit with an exit handler if it has one.
|
||||
*
|
||||
|
@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
*/
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/*
|
||||
* Save PSTATE early so that we can evaluate the vcpu mode
|
||||
* early on.
|
||||
*/
|
||||
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
/*
|
||||
* Check whether we want to repaint the state one way or
|
||||
* another.
|
||||
*/
|
||||
early_exit_filter(vcpu, exit_code);
|
||||
|
||||
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
|
|
|
@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
|||
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
|
||||
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
/*
|
||||
* Guest PSTATE gets saved at guest fixup time in all
|
||||
* cases. We still need to handle the nVHE host side here.
|
||||
*/
|
||||
if (!has_vhe() && ctxt->__hyp_running_vcpu)
|
||||
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
|
||||
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
|
||||
|
|
|
@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
|||
* Returns false if the guest ran in AArch32 when it shouldn't have, and
|
||||
* thus should exit to the host, or true if a the guest run loop can continue.
|
||||
*/
|
||||
static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
|
||||
|
||||
|
@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
vcpu->arch.target = -1;
|
||||
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
|
||||
*exit_code |= ARM_EXCEPTION_IL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Switch to the guest for legacy non-VHE systems */
|
||||
|
@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
/* Jump in the fire! */
|
||||
exit_code = __guest_enter(vcpu);
|
||||
|
||||
if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
|
||||
break;
|
||||
|
||||
/* And we're baaack! */
|
||||
} while (fixup_guest_exit(vcpu, &exit_code));
|
||||
|
||||
|
|
|
@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
|||
return hyp_exit_handlers;
|
||||
}
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
}
|
||||
|
||||
/* Switch to the guest for VHE systems running in EL2 */
|
||||
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
|
@ -369,3 +369,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr)
|
|||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
#define flush_dcache_page(page) __flush_page_to_ram(page_address(page))
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
#define flush_dcache_mmap_lock(mapping) do { } while (0)
|
||||
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
|
||||
#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page))
|
||||
|
|
|
@ -448,3 +448,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -454,3 +454,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -3097,7 +3097,7 @@ config STACKTRACE_SUPPORT
|
|||
config PGTABLE_LEVELS
|
||||
int
|
||||
default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
|
||||
default 3 if 64BIT && !PAGE_SIZE_64KB
|
||||
default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48)
|
||||
default 2
|
||||
|
||||
config MIPS_AUTO_PFN_OFFSET
|
||||
|
|
|
@ -52,7 +52,7 @@ endif
|
|||
|
||||
vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
|
||||
|
||||
vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o
|
||||
vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o
|
||||
|
||||
targets := $(notdir $(vmlinuzobjs-y))
|
||||
|
||||
|
|
|
@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page)
|
|||
SetPageDcacheDirty(page);
|
||||
}
|
||||
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
|
||||
#define flush_dcache_mmap_lock(mapping) do { } while (0)
|
||||
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
|
||||
|
||||
|
|
|
@ -1734,8 +1734,6 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c)
|
|||
|
||||
static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
|
||||
{
|
||||
decode_configs(c);
|
||||
|
||||
/* All Loongson processors covered here define ExcCode 16 as GSExc. */
|
||||
c->options |= MIPS_CPU_GSEXCEX;
|
||||
|
||||
|
@ -1796,6 +1794,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
|
|||
panic("Unknown Loongson Processor ID!");
|
||||
break;
|
||||
}
|
||||
|
||||
decode_configs(c);
|
||||
}
|
||||
#else
|
||||
static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { }
|
||||
|
|
|
@ -185,7 +185,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
|||
seq_puts(m, " tx39_cache");
|
||||
if (cpu_has_octeon_cache)
|
||||
seq_puts(m, " octeon_cache");
|
||||
if (cpu_has_fpu)
|
||||
if (raw_cpu_has_fpu)
|
||||
seq_puts(m, " fpu");
|
||||
if (cpu_has_32fpr)
|
||||
seq_puts(m, " 32fpr");
|
||||
|
|
|
@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end);
|
|||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
void flush_dcache_page(struct page *page);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
|
||||
unsigned long vaddr, void *dst, void *src, int len);
|
||||
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
|
||||
|
|
|
@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
|
|||
unsigned long pfn);
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
void flush_dcache_page(struct page *page);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
|
||||
extern void flush_icache_range(unsigned long start, unsigned long end);
|
||||
extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
|
||||
|
|
|
@ -15,7 +15,12 @@
|
|||
# Mike Shaver, Helge Deller and Martin K. Petersen
|
||||
#
|
||||
|
||||
ifdef CONFIG_PARISC_SELF_EXTRACT
|
||||
boot := arch/parisc/boot
|
||||
KBUILD_IMAGE := $(boot)/bzImage
|
||||
else
|
||||
KBUILD_IMAGE := vmlinuz
|
||||
endif
|
||||
|
||||
NM = sh $(srctree)/arch/parisc/nm
|
||||
CHECKFLAGS += -D__hppa__=1
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
CONFIG_LOCALVERSION="-64bit"
|
||||
# CONFIG_LOCALVERSION_AUTO is not set
|
||||
CONFIG_KERNEL_LZ4=y
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_POSIX_MQUEUE=y
|
||||
CONFIG_AUDIT=y
|
||||
CONFIG_BSD_PROCESS_ACCT=y
|
||||
CONFIG_BSD_PROCESS_ACCT_V3=y
|
||||
CONFIG_TASKSTATS=y
|
||||
|
@ -35,6 +37,7 @@ CONFIG_MODVERSIONS=y
|
|||
CONFIG_BLK_DEV_INTEGRITY=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
# CONFIG_COMPACTION is not set
|
||||
CONFIG_MEMORY_FAILURE=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
|
@ -65,12 +68,15 @@ CONFIG_SCSI_ISCSI_ATTRS=y
|
|||
CONFIG_SCSI_SRP_ATTRS=y
|
||||
CONFIG_ISCSI_BOOT_SYSFS=y
|
||||
CONFIG_SCSI_MPT2SAS=y
|
||||
CONFIG_SCSI_LASI700=m
|
||||
CONFIG_SCSI_LASI700=y
|
||||
CONFIG_SCSI_SYM53C8XX_2=y
|
||||
CONFIG_SCSI_ZALON=y
|
||||
CONFIG_SCSI_QLA_ISCSI=m
|
||||
CONFIG_SCSI_DH=y
|
||||
CONFIG_ATA=y
|
||||
CONFIG_SATA_SIL=y
|
||||
CONFIG_SATA_SIS=y
|
||||
CONFIG_SATA_VIA=y
|
||||
CONFIG_PATA_NS87415=y
|
||||
CONFIG_PATA_SIL680=y
|
||||
CONFIG_ATA_GENERIC=y
|
||||
|
@ -79,6 +85,7 @@ CONFIG_MD_LINEAR=m
|
|||
CONFIG_BLK_DEV_DM=m
|
||||
CONFIG_DM_RAID=m
|
||||
CONFIG_DM_UEVENT=y
|
||||
CONFIG_DM_AUDIT=y
|
||||
CONFIG_FUSION=y
|
||||
CONFIG_FUSION_SPI=y
|
||||
CONFIG_FUSION_SAS=y
|
||||
|
@ -196,10 +203,15 @@ CONFIG_FB_MATROX_G=y
|
|||
CONFIG_FB_MATROX_I2C=y
|
||||
CONFIG_FB_MATROX_MAVEN=y
|
||||
CONFIG_FB_RADEON=y
|
||||
CONFIG_LOGO=y
|
||||
# CONFIG_LOGO_LINUX_CLUT224 is not set
|
||||
CONFIG_HIDRAW=y
|
||||
CONFIG_HID_PID=y
|
||||
CONFIG_USB_HIDDEV=y
|
||||
CONFIG_USB=y
|
||||
CONFIG_USB_EHCI_HCD=y
|
||||
CONFIG_USB_OHCI_HCD=y
|
||||
CONFIG_USB_OHCI_HCD_PLATFORM=y
|
||||
CONFIG_UIO=y
|
||||
CONFIG_UIO_PDRV_GENIRQ=m
|
||||
CONFIG_UIO_AEC=m
|
||||
|
|
|
@ -147,6 +147,17 @@
|
|||
extrd,u \r, 63-(\sa), 64-(\sa), \t
|
||||
.endm
|
||||
|
||||
/* Extract unsigned for 32- and 64-bit
|
||||
* The extru instruction leaves the most significant 32 bits of the
|
||||
* target register in an undefined state on PA 2.0 systems. */
|
||||
.macro extru_safe r, p, len, t
|
||||
#ifdef CONFIG_64BIT
|
||||
extrd,u \r, 32+(\p), \len, \t
|
||||
#else
|
||||
extru \r, \p, \len, \t
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* load 32-bit 'value' into 'reg' compensating for the ldil
|
||||
* sign-extension when running in wide mode.
|
||||
* WARNING!! neither 'value' nor 'reg' can be expressions
|
||||
|
|
|
@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
|
|||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
void flush_dcache_page(struct page *page);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
|
||||
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
|
||||
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
|
||||
|
|
|
@ -39,6 +39,7 @@ verify "$3"
|
|||
if [ -n "${INSTALLKERNEL}" ]; then
|
||||
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
|
||||
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
|
||||
if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi
|
||||
fi
|
||||
|
||||
# Default install
|
||||
|
|
|
@ -366,17 +366,9 @@
|
|||
*/
|
||||
.macro L2_ptep pmd,pte,index,va,fault
|
||||
#if CONFIG_PGTABLE_LEVELS == 3
|
||||
extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
|
||||
extru_safe \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
|
||||
#else
|
||||
# if defined(CONFIG_64BIT)
|
||||
extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
|
||||
#else
|
||||
# if PAGE_SIZE > 4096
|
||||
extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
|
||||
# else
|
||||
extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
|
||||
# endif
|
||||
# endif
|
||||
extru_safe \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
|
||||
#endif
|
||||
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
|
||||
#if CONFIG_PGTABLE_LEVELS < 3
|
||||
|
@ -386,7 +378,7 @@
|
|||
bb,>=,n \pmd,_PxD_PRESENT_BIT,\fault
|
||||
dep %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
|
||||
SHLREG \pmd,PxD_VALUE_SHIFT,\pmd
|
||||
extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
|
||||
extru_safe \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
|
||||
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
|
||||
shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
|
||||
.endm
|
||||
|
|
|
@ -566,7 +566,7 @@ lws_compare_and_swap:
|
|||
ldo R%lws_lock_start(%r20), %r28
|
||||
|
||||
/* Extract eight bits from r26 and hash lock (Bits 3-11) */
|
||||
extru %r26, 28, 8, %r20
|
||||
extru_safe %r26, 28, 8, %r20
|
||||
|
||||
/* Find lock to use, the hash is either one of 0 to
|
||||
15, multiplied by 16 (keep it 16-byte aligned)
|
||||
|
@ -751,7 +751,7 @@ cas2_lock_start:
|
|||
ldo R%lws_lock_start(%r20), %r28
|
||||
|
||||
/* Extract eight bits from r26 and hash lock (Bits 3-11) */
|
||||
extru %r26, 28, 8, %r20
|
||||
extru_safe %r26, 28, 8, %r20
|
||||
|
||||
/* Find lock to use, the hash is either one of 0 to
|
||||
15, multiplied by 16 (keep it 16-byte aligned)
|
||||
|
|
|
@ -249,30 +249,16 @@ void __init time_init(void)
|
|||
static int __init init_cr16_clocksource(void)
|
||||
{
|
||||
/*
|
||||
* The cr16 interval timers are not syncronized across CPUs on
|
||||
* different sockets, so mark them unstable and lower rating on
|
||||
* multi-socket SMP systems.
|
||||
* The cr16 interval timers are not syncronized across CPUs, even if
|
||||
* they share the same socket.
|
||||
*/
|
||||
if (num_online_cpus() > 1 && !running_on_qemu) {
|
||||
int cpu;
|
||||
unsigned long cpu0_loc;
|
||||
cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
|
||||
/* mark sched_clock unstable */
|
||||
clear_sched_clock_stable();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == 0)
|
||||
continue;
|
||||
if ((cpu0_loc != 0) &&
|
||||
(cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
|
||||
continue;
|
||||
|
||||
/* mark sched_clock unstable */
|
||||
clear_sched_clock_stable();
|
||||
|
||||
clocksource_cr16.name = "cr16_unstable";
|
||||
clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
|
||||
clocksource_cr16.rating = 0;
|
||||
break;
|
||||
}
|
||||
clocksource_cr16.name = "cr16_unstable";
|
||||
clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
|
||||
clocksource_cr16.rating = 0;
|
||||
}
|
||||
|
||||
/* register at clocksource framework */
|
||||
|
|
|
@ -57,8 +57,6 @@ SECTIONS
|
|||
{
|
||||
. = KERNEL_BINARY_TEXT_START;
|
||||
|
||||
_stext = .; /* start of kernel text, includes init code & data */
|
||||
|
||||
__init_begin = .;
|
||||
HEAD_TEXT_SECTION
|
||||
MLONGCALL_DISCARD(INIT_TEXT_SECTION(8))
|
||||
|
@ -82,6 +80,7 @@ SECTIONS
|
|||
/* freed after init ends here */
|
||||
|
||||
_text = .; /* Text and read-only data */
|
||||
_stext = .;
|
||||
MLONGCALL_KEEP(INIT_TEXT_SECTION(8))
|
||||
.text ALIGN(PAGE_SIZE) : {
|
||||
TEXT_TEXT
|
||||
|
|
|
@ -202,11 +202,11 @@ vmap_stack_overflow:
|
|||
mfspr r1, SPRN_SPRG_THREAD
|
||||
lwz r1, TASK_CPU - THREAD(r1)
|
||||
slwi r1, r1, 3
|
||||
addis r1, r1, emergency_ctx@ha
|
||||
addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
|
||||
#else
|
||||
lis r1, emergency_ctx@ha
|
||||
lis r1, emergency_ctx-PAGE_OFFSET@ha
|
||||
#endif
|
||||
lwz r1, emergency_ctx@l(r1)
|
||||
lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
|
||||
addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
|
||||
EXCEPTION_PROLOG_2 0 vmap_stack_overflow
|
||||
prepare_transfer_to_handler
|
||||
|
|
|
@ -528,3 +528,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -695,6 +695,7 @@ static void flush_guest_tlb(struct kvm *kvm)
|
|||
"r" (0) : "memory");
|
||||
}
|
||||
asm volatile("ptesync": : :"memory");
|
||||
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
|
||||
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
|
||||
} else {
|
||||
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
|
||||
|
@ -705,7 +706,9 @@ static void flush_guest_tlb(struct kvm *kvm)
|
|||
rb += PPC_BIT(51); /* increment set number */
|
||||
}
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
|
||||
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,14 +12,12 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/kvm_vcpu_fp.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define KVM_MAX_VCPUS (1U << 16)
|
||||
#else
|
||||
#define KVM_MAX_VCPUS (1U << 9)
|
||||
#endif
|
||||
#define KVM_MAX_VCPUS \
|
||||
((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
|
||||
|
||||
#define KVM_HALT_POLL_NS_DEFAULT 500000
|
||||
|
||||
|
|
|
@ -453,6 +453,12 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
|||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
|
||||
phys_addr_t size = slot->npages << PAGE_SHIFT;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
stage2_unmap_range(kvm, gpa, size, false);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
|
|
|
@ -403,7 +403,6 @@ CONFIG_DEVTMPFS=y
|
|||
CONFIG_CONNECTOR=y
|
||||
CONFIG_ZRAM=y
|
||||
CONFIG_BLK_DEV_LOOP=m
|
||||
CONFIG_BLK_DEV_CRYPTOLOOP=m
|
||||
CONFIG_BLK_DEV_DRBD=m
|
||||
CONFIG_BLK_DEV_NBD=m
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
|
@ -476,6 +475,7 @@ CONFIG_MACVLAN=m
|
|||
CONFIG_MACVTAP=m
|
||||
CONFIG_VXLAN=m
|
||||
CONFIG_BAREUDP=m
|
||||
CONFIG_AMT=m
|
||||
CONFIG_TUN=m
|
||||
CONFIG_VETH=m
|
||||
CONFIG_VIRTIO_NET=m
|
||||
|
@ -489,6 +489,7 @@ CONFIG_NLMON=m
|
|||
# CONFIG_NET_VENDOR_AMD is not set
|
||||
# CONFIG_NET_VENDOR_AQUANTIA is not set
|
||||
# CONFIG_NET_VENDOR_ARC is not set
|
||||
# CONFIG_NET_VENDOR_ASIX is not set
|
||||
# CONFIG_NET_VENDOR_ATHEROS is not set
|
||||
# CONFIG_NET_VENDOR_BROADCOM is not set
|
||||
# CONFIG_NET_VENDOR_BROCADE is not set
|
||||
|
@ -571,6 +572,7 @@ CONFIG_WATCHDOG=y
|
|||
CONFIG_WATCHDOG_NOWAYOUT=y
|
||||
CONFIG_SOFT_WATCHDOG=m
|
||||
CONFIG_DIAG288_WATCHDOG=m
|
||||
# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
|
||||
CONFIG_FB=y
|
||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
|
||||
|
@ -775,12 +777,14 @@ CONFIG_CRC4=m
|
|||
CONFIG_CRC7=m
|
||||
CONFIG_CRC8=m
|
||||
CONFIG_RANDOM32_SELFTEST=y
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_DMA_CMA=y
|
||||
CONFIG_CMA_SIZE_MBYTES=0
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DYNAMIC_DEBUG=y
|
||||
CONFIG_DEBUG_INFO=y
|
||||
CONFIG_DEBUG_INFO_DWARF4=y
|
||||
CONFIG_DEBUG_INFO_BTF=y
|
||||
CONFIG_GDB_SCRIPTS=y
|
||||
CONFIG_HEADERS_INSTALL=y
|
||||
CONFIG_DEBUG_SECTION_MISMATCH=y
|
||||
|
@ -807,6 +811,7 @@ CONFIG_DEBUG_MEMORY_INIT=y
|
|||
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
|
||||
CONFIG_DEBUG_PER_CPU_MAPS=y
|
||||
CONFIG_KFENCE=y
|
||||
CONFIG_KFENCE_STATIC_KEYS=y
|
||||
CONFIG_DEBUG_SHIRQ=y
|
||||
CONFIG_PANIC_ON_OOPS=y
|
||||
CONFIG_DETECT_HUNG_TASK=y
|
||||
|
@ -842,6 +847,7 @@ CONFIG_FTRACE_STARTUP_TEST=y
|
|||
CONFIG_SAMPLES=y
|
||||
CONFIG_SAMPLE_TRACE_PRINTK=m
|
||||
CONFIG_SAMPLE_FTRACE_DIRECT=m
|
||||
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
|
||||
CONFIG_DEBUG_ENTRY=y
|
||||
CONFIG_CIO_INJECT=y
|
||||
CONFIG_KUNIT=m
|
||||
|
@ -860,7 +866,7 @@ CONFIG_FAIL_FUNCTION=y
|
|||
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
|
||||
CONFIG_LKDTM=m
|
||||
CONFIG_TEST_MIN_HEAP=y
|
||||
CONFIG_KPROBES_SANITY_TEST=y
|
||||
CONFIG_KPROBES_SANITY_TEST=m
|
||||
CONFIG_RBTREE_TEST=y
|
||||
CONFIG_INTERVAL_TREE_TEST=m
|
||||
CONFIG_PERCPU_TEST=m
|
||||
|
|
|
@ -394,7 +394,6 @@ CONFIG_DEVTMPFS=y
|
|||
CONFIG_CONNECTOR=y
|
||||
CONFIG_ZRAM=y
|
||||
CONFIG_BLK_DEV_LOOP=m
|
||||
CONFIG_BLK_DEV_CRYPTOLOOP=m
|
||||
CONFIG_BLK_DEV_DRBD=m
|
||||
CONFIG_BLK_DEV_NBD=m
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
|
@ -467,6 +466,7 @@ CONFIG_MACVLAN=m
|
|||
CONFIG_MACVTAP=m
|
||||
CONFIG_VXLAN=m
|
||||
CONFIG_BAREUDP=m
|
||||
CONFIG_AMT=m
|
||||
CONFIG_TUN=m
|
||||
CONFIG_VETH=m
|
||||
CONFIG_VIRTIO_NET=m
|
||||
|
@ -480,6 +480,7 @@ CONFIG_NLMON=m
|
|||
# CONFIG_NET_VENDOR_AMD is not set
|
||||
# CONFIG_NET_VENDOR_AQUANTIA is not set
|
||||
# CONFIG_NET_VENDOR_ARC is not set
|
||||
# CONFIG_NET_VENDOR_ASIX is not set
|
||||
# CONFIG_NET_VENDOR_ATHEROS is not set
|
||||
# CONFIG_NET_VENDOR_BROADCOM is not set
|
||||
# CONFIG_NET_VENDOR_BROCADE is not set
|
||||
|
@ -762,12 +763,14 @@ CONFIG_PRIME_NUMBERS=m
|
|||
CONFIG_CRC4=m
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_CRC8=m
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_DMA_CMA=y
|
||||
CONFIG_CMA_SIZE_MBYTES=0
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DYNAMIC_DEBUG=y
|
||||
CONFIG_DEBUG_INFO=y
|
||||
CONFIG_DEBUG_INFO_DWARF4=y
|
||||
CONFIG_DEBUG_INFO_BTF=y
|
||||
CONFIG_GDB_SCRIPTS=y
|
||||
CONFIG_DEBUG_SECTION_MISMATCH=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
@ -792,9 +795,11 @@ CONFIG_HIST_TRIGGERS=y
|
|||
CONFIG_SAMPLES=y
|
||||
CONFIG_SAMPLE_TRACE_PRINTK=m
|
||||
CONFIG_SAMPLE_FTRACE_DIRECT=m
|
||||
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
|
||||
CONFIG_KUNIT=m
|
||||
CONFIG_KUNIT_DEBUGFS=y
|
||||
CONFIG_LKDTM=m
|
||||
CONFIG_KPROBES_SANITY_TEST=m
|
||||
CONFIG_PERCPU_TEST=m
|
||||
CONFIG_ATOMIC64_SELFTEST=y
|
||||
CONFIG_TEST_BPF=m
|
||||
|
|
|
@ -65,9 +65,11 @@ CONFIG_ZFCP=y
|
|||
# CONFIG_NETWORK_FILESYSTEMS is not set
|
||||
CONFIG_LSM="yama,loadpin,safesetid,integrity"
|
||||
# CONFIG_ZLIB_DFLTCC is not set
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_SYMBOLIC_ERRNAME is not set
|
||||
CONFIG_DEBUG_INFO=y
|
||||
CONFIG_DEBUG_INFO_BTF=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_PANIC_ON_OOPS=y
|
||||
|
|
|
@ -14,12 +14,13 @@
|
|||
|
||||
/* I/O Map */
|
||||
#define ZPCI_IOMAP_SHIFT 48
|
||||
#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
|
||||
#define ZPCI_IOMAP_ADDR_SHIFT 62
|
||||
#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT)
|
||||
#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
|
||||
#define ZPCI_IOMAP_MAX_ENTRIES \
|
||||
((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
|
||||
(1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
|
||||
#define ZPCI_IOMAP_ADDR_IDX_MASK \
|
||||
(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
|
||||
((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
|
||||
|
||||
struct zpci_iomap_entry {
|
||||
u32 fh;
|
||||
|
|
|
@ -173,10 +173,11 @@ static noinline int unwindme_func4(struct unwindme *u)
|
|||
}
|
||||
|
||||
/*
|
||||
* trigger specification exception
|
||||
* Trigger operation exception; use insn notation to bypass
|
||||
* llvm's integrated assembler sanity checks.
|
||||
*/
|
||||
asm volatile(
|
||||
" mvcl %%r1,%%r1\n"
|
||||
" .insn e,0x0000\n" /* illegal opcode */
|
||||
"0: nopr %%r7\n"
|
||||
EX_TABLE(0b, 0b)
|
||||
:);
|
||||
|
|
|
@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma,
|
|||
unsigned long start, unsigned long end);
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
void flush_dcache_page(struct page *page);
|
||||
void flush_dcache_folio(struct folio *folio);
|
||||
extern void flush_icache_range(unsigned long start, unsigned long end);
|
||||
#define flush_icache_user_range flush_icache_range
|
||||
extern void flush_icache_page(struct vm_area_struct *vma,
|
||||
|
|
|
@ -451,3 +451,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -494,3 +494,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
|
@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
|
|||
ud2
|
||||
1:
|
||||
#endif
|
||||
#ifdef CONFIG_XEN_PV
|
||||
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
|
||||
#endif
|
||||
|
||||
POP_REGS pop_rdi=0
|
||||
|
||||
/*
|
||||
|
@ -890,6 +894,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
|||
.Lparanoid_entry_checkgs:
|
||||
/* EBX = 1 -> kernel GSBASE active, no restore required */
|
||||
movl $1, %ebx
|
||||
|
||||
/*
|
||||
* The kernel-enforced convention is a negative GSBASE indicates
|
||||
* a kernel value. No SWAPGS needed on entry and exit.
|
||||
|
@ -897,21 +902,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
|||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
testl %edx, %edx
|
||||
jns .Lparanoid_entry_swapgs
|
||||
ret
|
||||
|
||||
.Lparanoid_entry_swapgs:
|
||||
swapgs
|
||||
|
||||
/*
|
||||
* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
|
||||
* unconditional CR3 write, even in the PTI case. So do an lfence
|
||||
* to prevent GS speculation, regardless of whether PTI is enabled.
|
||||
*/
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
js .Lparanoid_kernel_gsbase
|
||||
|
||||
/* EBX = 0 -> SWAPGS required on exit */
|
||||
xorl %ebx, %ebx
|
||||
swapgs
|
||||
.Lparanoid_kernel_gsbase:
|
||||
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
ret
|
||||
SYM_CODE_END(paranoid_entry)
|
||||
|
||||
|
@ -993,11 +991,6 @@ SYM_CODE_START_LOCAL(error_entry)
|
|||
pushq %r12
|
||||
ret
|
||||
|
||||
.Lerror_entry_done_lfence:
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
.Lerror_entry_done:
|
||||
ret
|
||||
|
||||
/*
|
||||
* There are two places in the kernel that can potentially fault with
|
||||
* usergs. Handle them here. B stepping K8s sometimes report a
|
||||
|
@ -1020,8 +1013,14 @@ SYM_CODE_START_LOCAL(error_entry)
|
|||
* .Lgs_change's error handler with kernel gsbase.
|
||||
*/
|
||||
SWAPGS
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
jmp .Lerror_entry_done
|
||||
|
||||
/*
|
||||
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
|
||||
* kernel or user gsbase.
|
||||
*/
|
||||
.Lerror_entry_done_lfence:
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
ret
|
||||
|
||||
.Lbstep_iret:
|
||||
/* Fix truncated RIP */
|
||||
|
|
|
@ -102,12 +102,6 @@ extern void switch_fpu_return(void);
|
|||
*/
|
||||
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
|
||||
|
||||
/*
|
||||
* Tasks that are not using SVA have mm->pasid set to zero to note that they
|
||||
* will not have the valid bit set in MSR_IA32_PASID while they are running.
|
||||
*/
|
||||
#define PASID_DISABLED 0
|
||||
|
||||
/* Trap handling */
|
||||
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
|
||||
extern void fpu_sync_fpstate(struct fpu *fpu);
|
||||
|
|
|
@ -108,7 +108,7 @@
|
|||
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
|
||||
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
|
||||
|
||||
#define INTEL_FAM6_RAPTOR_LAKE 0xB7
|
||||
#define INTEL_FAM6_RAPTORLAKE 0xB7
|
||||
|
||||
/* "Small Core" Processors (Atom) */
|
||||
|
||||
|
|
|
@ -1036,6 +1036,7 @@ struct kvm_x86_msr_filter {
|
|||
#define APICV_INHIBIT_REASON_PIT_REINJ 4
|
||||
#define APICV_INHIBIT_REASON_X2APIC 5
|
||||
#define APICV_INHIBIT_REASON_BLOCKIRQ 6
|
||||
#define APICV_INHIBIT_REASON_ABSENT 7
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long n_used_mmu_pages;
|
||||
|
|
|
@ -73,4 +73,15 @@
|
|||
|
||||
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
|
||||
|
||||
/*
|
||||
* Error codes related to GHCB input that can be communicated back to the guest
|
||||
* by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
|
||||
*/
|
||||
#define GHCB_ERR_NOT_REGISTERED 1
|
||||
#define GHCB_ERR_INVALID_USAGE 2
|
||||
#define GHCB_ERR_INVALID_SCRATCH_AREA 3
|
||||
#define GHCB_ERR_MISSING_INPUT 4
|
||||
#define GHCB_ERR_INVALID_INPUT 5
|
||||
#define GHCB_ERR_INVALID_EVENT 6
|
||||
|
||||
#endif
|
||||
|
|
|
@ -281,13 +281,13 @@ HYPERVISOR_callback_op(int cmd, void *arg)
|
|||
return _hypercall2(int, callback_op, cmd, arg);
|
||||
}
|
||||
|
||||
static inline int
|
||||
static __always_inline int
|
||||
HYPERVISOR_set_debugreg(int reg, unsigned long value)
|
||||
{
|
||||
return _hypercall2(int, set_debugreg, reg, value);
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
static __always_inline unsigned long
|
||||
HYPERVISOR_get_debugreg(int reg)
|
||||
{
|
||||
return _hypercall1(unsigned long, get_debugreg, reg);
|
||||
|
|
|
@ -64,6 +64,7 @@ void xen_arch_unregister_cpu(int num);
|
|||
|
||||
#ifdef CONFIG_PVH
|
||||
void __init xen_pvh_init(struct boot_params *boot_params);
|
||||
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
|
||||
|
|
|
@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
|
|||
struct fpstate *fpstate)
|
||||
{
|
||||
struct xregs_state __user *x = buf;
|
||||
struct _fpx_sw_bytes sw_bytes;
|
||||
struct _fpx_sw_bytes sw_bytes = {};
|
||||
u32 xfeatures;
|
||||
int err;
|
||||
|
||||
|
|
|
@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static char *prepare_command_line(void)
|
||||
static char * __init prepare_command_line(void)
|
||||
{
|
||||
#ifdef CONFIG_CMDLINE_BOOL
|
||||
#ifdef CONFIG_CMDLINE_OVERRIDE
|
||||
|
|
|
@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
|
|||
char *dst, char *buf, size_t size)
|
||||
{
|
||||
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
|
||||
char __user *target = (char __user *)dst;
|
||||
u64 d8;
|
||||
u32 d4;
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/*
|
||||
* This function uses __put_user() independent of whether kernel or user
|
||||
|
@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
|
|||
* instructions here would cause infinite nesting.
|
||||
*/
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 1: {
|
||||
u8 d1;
|
||||
u8 __user *target = (u8 __user *)dst;
|
||||
|
||||
memcpy(&d1, buf, 1);
|
||||
if (__put_user(d1, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
case 2: {
|
||||
u16 d2;
|
||||
u16 __user *target = (u16 __user *)dst;
|
||||
|
||||
memcpy(&d2, buf, 2);
|
||||
if (__put_user(d2, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
case 4: {
|
||||
u32 d4;
|
||||
u32 __user *target = (u32 __user *)dst;
|
||||
|
||||
memcpy(&d4, buf, 4);
|
||||
if (__put_user(d4, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 8:
|
||||
}
|
||||
case 8: {
|
||||
u64 d8;
|
||||
u64 __user *target = (u64 __user *)dst;
|
||||
|
||||
memcpy(&d8, buf, 8);
|
||||
if (__put_user(d8, target))
|
||||
goto fault;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
|
||||
return ES_UNSUPPORTED;
|
||||
|
@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
|||
char *src, char *buf, size_t size)
|
||||
{
|
||||
unsigned long error_code = X86_PF_PROT;
|
||||
char __user *s = (char __user *)src;
|
||||
u64 d8;
|
||||
u32 d4;
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/*
|
||||
* This function uses __get_user() independent of whether kernel or user
|
||||
|
@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
|||
* instructions here would cause infinite nesting.
|
||||
*/
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 1: {
|
||||
u8 d1;
|
||||
u8 __user *s = (u8 __user *)src;
|
||||
|
||||
if (__get_user(d1, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d1, 1);
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
case 2: {
|
||||
u16 d2;
|
||||
u16 __user *s = (u16 __user *)src;
|
||||
|
||||
if (__get_user(d2, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d2, 2);
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
case 4: {
|
||||
u32 d4;
|
||||
u32 __user *s = (u32 __user *)src;
|
||||
|
||||
if (__get_user(d4, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d4, 4);
|
||||
break;
|
||||
case 8:
|
||||
}
|
||||
case 8: {
|
||||
u64 d8;
|
||||
u64 __user *s = (u64 __user *)src;
|
||||
if (__get_user(d8, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d8, 8);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
|
||||
return ES_UNSUPPORTED;
|
||||
|
|
|
@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason)
|
|||
|
||||
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
|
||||
|
||||
static void __init tsc_disable_clocksource_watchdog(void)
|
||||
{
|
||||
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
}
|
||||
|
||||
static void __init check_system_tsc_reliable(void)
|
||||
{
|
||||
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
|
||||
|
@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void)
|
|||
#endif
|
||||
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
|
||||
tsc_clocksource_reliable = 1;
|
||||
|
||||
/*
|
||||
* Disable the clocksource watchdog when the system has:
|
||||
* - TSC running at constant frequency
|
||||
* - TSC which does not stop in C-States
|
||||
* - the TSC_ADJUST register which allows to detect even minimal
|
||||
* modifications
|
||||
* - not more than two sockets. As the number of sockets cannot be
|
||||
* evaluated at the early boot stage where this has to be
|
||||
* invoked, check the number of online memory nodes as a
|
||||
* fallback solution which is an reasonable estimate.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
|
||||
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
|
||||
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
|
||||
nr_online_nodes <= 2)
|
||||
tsc_disable_clocksource_watchdog();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void)
|
|||
if (tsc_unstable)
|
||||
goto unreg;
|
||||
|
||||
if (tsc_clocksource_reliable || no_tsc_watchdog)
|
||||
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
|
||||
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
|
||||
|
||||
|
@ -1527,7 +1547,7 @@ void __init tsc_init(void)
|
|||
}
|
||||
|
||||
if (tsc_clocksource_reliable || no_tsc_watchdog)
|
||||
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
tsc_disable_clocksource_watchdog();
|
||||
|
||||
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
|
||||
detect_art();
|
||||
|
|
|
@ -30,6 +30,7 @@ struct tsc_adjust {
|
|||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
|
||||
static struct timer_list tsc_sync_check_timer;
|
||||
|
||||
/*
|
||||
* TSC's on different sockets may be reset asynchronously.
|
||||
|
@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Normally the tsc_sync will be checked every time system enters idle
|
||||
* state, but there is still caveat that a system won't enter idle,
|
||||
* either because it's too busy or configured purposely to not enter
|
||||
* idle.
|
||||
*
|
||||
* So setup a periodic timer (every 10 minutes) to make sure the check
|
||||
* is always on.
|
||||
*/
|
||||
|
||||
#define SYNC_CHECK_INTERVAL (HZ * 600)
|
||||
|
||||
static void tsc_sync_check_timer_fn(struct timer_list *unused)
|
||||
{
|
||||
int next_cpu;
|
||||
|
||||
tsc_verify_tsc_adjust(false);
|
||||
|
||||
/* Run the check for all onlined CPUs in turn */
|
||||
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
next_cpu = cpumask_first(cpu_online_mask);
|
||||
|
||||
tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
|
||||
add_timer_on(&tsc_sync_check_timer, next_cpu);
|
||||
}
|
||||
|
||||
static int __init start_sync_check_timer(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
|
||||
return 0;
|
||||
|
||||
timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
|
||||
tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
|
||||
add_timer(&tsc_sync_check_timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(start_sync_check_timer);
|
||||
|
||||
static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
|
||||
unsigned int cpu, bool bootcpu)
|
||||
{
|
||||
|
|
|
@ -81,7 +81,6 @@ struct kvm_ioapic {
|
|||
unsigned long irq_states[IOAPIC_NUM_PINS];
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
spinlock_t lock;
|
||||
struct rtc_status rtc_status;
|
||||
struct delayed_work eoi_inject;
|
||||
|
|
|
@ -56,7 +56,6 @@ struct kvm_pic {
|
|||
struct kvm_io_device dev_master;
|
||||
struct kvm_io_device dev_slave;
|
||||
struct kvm_io_device dev_elcr;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
unsigned long irq_states[PIC_NUM_PINS];
|
||||
};
|
||||
|
||||
|
|
|
@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
|
|||
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
|
||||
{
|
||||
int highest_irr;
|
||||
if (apic->vcpu->arch.apicv_active)
|
||||
if (kvm_x86_ops.sync_pir_to_irr)
|
||||
highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
|
||||
else
|
||||
highest_irr = apic_find_highest_irr(apic);
|
||||
|
|
|
@ -1582,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||
flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
|
||||
flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
@ -1936,7 +1936,11 @@ static void mmu_audit_disable(void) { }
|
|||
|
||||
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
return sp->role.invalid ||
|
||||
if (sp->role.invalid)
|
||||
return true;
|
||||
|
||||
/* TDP MMU pages due not use the MMU generation. */
|
||||
return !sp->tdp_mmu_page &&
|
||||
unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
||||
}
|
||||
|
||||
|
@ -2173,10 +2177,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
|
|||
iterator->shadow_addr = root;
|
||||
iterator->level = vcpu->arch.mmu->shadow_root_level;
|
||||
|
||||
if (iterator->level == PT64_ROOT_4LEVEL &&
|
||||
if (iterator->level >= PT64_ROOT_4LEVEL &&
|
||||
vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
|
||||
!vcpu->arch.mmu->direct_map)
|
||||
--iterator->level;
|
||||
iterator->level = PT32E_ROOT_LEVEL;
|
||||
|
||||
if (iterator->level == PT32E_ROOT_LEVEL) {
|
||||
/*
|
||||
|
@ -3976,6 +3980,20 @@ out_retry:
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the page fault is stale and needs to be retried, i.e. if the
|
||||
* root was invalidated by a memslot update or a relevant mmu_notifier fired.
|
||||
*/
|
||||
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault, int mmu_seq)
|
||||
{
|
||||
if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa)))
|
||||
return true;
|
||||
|
||||
return fault->slot &&
|
||||
mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
|
||||
}
|
||||
|
||||
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
|
||||
|
@ -4013,8 +4031,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
|||
else
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
|
||||
if (is_page_fault_stale(vcpu, fault, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
|
@ -4855,7 +4874,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
|
|||
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
|
||||
struct kvm_mmu_role_regs regs = {
|
||||
.cr0 = cr0,
|
||||
.cr4 = cr4,
|
||||
.cr4 = cr4 & ~X86_CR4_PKE,
|
||||
.efer = efer,
|
||||
};
|
||||
union kvm_mmu_role new_role;
|
||||
|
@ -4919,7 +4938,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
|||
context->direct_map = false;
|
||||
|
||||
update_permission_bitmask(context, true);
|
||||
update_pkru_bitmask(context);
|
||||
context->pkru_mask = 0;
|
||||
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
|
||||
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
|
||||
}
|
||||
|
@ -5025,6 +5044,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
/*
|
||||
* Invalidate all MMU roles to force them to reinitialize as CPUID
|
||||
* information is factored into reserved bit calculations.
|
||||
*
|
||||
* Correctly handling multiple vCPU models with respect to paging and
|
||||
* physical address properties) in a single VM would require tracking
|
||||
* all relevant CPUID information in kvm_mmu_page_role. That is very
|
||||
* undesirable as it would increase the memory requirements for
|
||||
* gfn_track (see struct kvm_mmu_page_role comments). For now that
|
||||
* problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* it's all but impossible to solve it without introducing a new API.
|
||||
*/
|
||||
vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
|
||||
vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
|
||||
|
@ -5032,24 +5059,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
/*
|
||||
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
|
||||
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
|
||||
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
|
||||
* faults due to reusing SPs/SPTEs. Alert userspace, but otherwise
|
||||
* sweep the problem under the rug.
|
||||
*
|
||||
* KVM's horrific CPUID ABI makes the problem all but impossible to
|
||||
* solve, as correctly handling multiple vCPU models (with respect to
|
||||
* paging and physical address properties) in a single VM would require
|
||||
* tracking all relevant CPUID information in kvm_mmu_page_role. That
|
||||
* is very undesirable as it would double the memory requirements for
|
||||
* gfn_track (see struct kvm_mmu_page_role comments), and in practice
|
||||
* no sane VMM mucks with the core vCPU model on the fly.
|
||||
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
|
||||
* kvm_arch_vcpu_ioctl().
|
||||
*/
|
||||
if (vcpu->arch.last_vmentry_cpu != -1) {
|
||||
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
|
||||
pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
|
||||
}
|
||||
KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
|
||||
|
@ -5369,7 +5382,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|||
|
||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
{
|
||||
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
|
||||
kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
|
||||
++vcpu->stat.invlpg;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
|
||||
|
@ -5854,8 +5867,6 @@ restart:
|
|||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
bool flush = false;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
write_lock(&kvm->mmu_lock);
|
||||
/*
|
||||
|
@ -5863,17 +5874,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
|||
* logging at a 4k granularity and never creates collapsible
|
||||
* 2m SPTEs during dirty logging.
|
||||
*/
|
||||
flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
|
||||
if (flush)
|
||||
if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
|
||||
if (flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
|
||||
kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
@ -6182,23 +6190,46 @@ void kvm_mmu_module_exit(void)
|
|||
mmu_audit_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the effective recovery period, accounting for '0' meaning "let KVM
|
||||
* select a halving time of 1 hour". Returns true if recovery is enabled.
|
||||
*/
|
||||
static bool calc_nx_huge_pages_recovery_period(uint *period)
|
||||
{
|
||||
/*
|
||||
* Use READ_ONCE to get the params, this may be called outside of the
|
||||
* param setters, e.g. by the kthread to compute its next timeout.
|
||||
*/
|
||||
bool enabled = READ_ONCE(nx_huge_pages);
|
||||
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
|
||||
if (!enabled || !ratio)
|
||||
return false;
|
||||
|
||||
*period = READ_ONCE(nx_huge_pages_recovery_period_ms);
|
||||
if (!*period) {
|
||||
/* Make sure the period is not less than one second. */
|
||||
ratio = min(ratio, 3600u);
|
||||
*period = 60 * 60 * 1000 / ratio;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
bool was_recovery_enabled, is_recovery_enabled;
|
||||
uint old_period, new_period;
|
||||
int err;
|
||||
|
||||
was_recovery_enabled = nx_huge_pages_recovery_ratio;
|
||||
old_period = nx_huge_pages_recovery_period_ms;
|
||||
was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
|
||||
|
||||
err = param_set_uint(val, kp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
is_recovery_enabled = nx_huge_pages_recovery_ratio;
|
||||
new_period = nx_huge_pages_recovery_period_ms;
|
||||
is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
|
||||
|
||||
if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
|
||||
if (is_recovery_enabled &&
|
||||
(!was_recovery_enabled || old_period > new_period)) {
|
||||
struct kvm *kvm;
|
||||
|
||||
|
@ -6262,18 +6293,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
|||
|
||||
static long get_nx_lpage_recovery_timeout(u64 start_time)
|
||||
{
|
||||
uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
|
||||
bool enabled;
|
||||
uint period;
|
||||
|
||||
if (!period && ratio) {
|
||||
/* Make sure the period is not less than one second. */
|
||||
ratio = min(ratio, 3600u);
|
||||
period = 60 * 60 * 1000 / ratio;
|
||||
}
|
||||
enabled = calc_nx_huge_pages_recovery_period(&period);
|
||||
|
||||
return READ_ONCE(nx_huge_pages) && ratio
|
||||
? start_time + msecs_to_jiffies(period) - get_jiffies_64()
|
||||
: MAX_SCHEDULE_TIMEOUT;
|
||||
return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
|
||||
: MAX_SCHEDULE_TIMEOUT;
|
||||
}
|
||||
|
||||
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
|
||||
|
|
|
@ -911,7 +911,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
|||
|
||||
r = RET_PF_RETRY;
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
|
||||
|
||||
if (is_page_fault_stale(vcpu, fault, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
|
|
|
@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
|
|||
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
|
||||
int level = sp->role.level;
|
||||
gfn_t base_gfn = sp->gfn;
|
||||
u64 old_child_spte;
|
||||
u64 *sptep;
|
||||
gfn_t gfn;
|
||||
int i;
|
||||
|
||||
trace_kvm_mmu_prepare_zap_page(sp);
|
||||
|
@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
|
|||
tdp_mmu_unlink_page(kvm, sp, shared);
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
|
||||
sptep = rcu_dereference(pt) + i;
|
||||
gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
|
||||
u64 *sptep = rcu_dereference(pt) + i;
|
||||
gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
|
||||
u64 old_child_spte;
|
||||
|
||||
if (shared) {
|
||||
/*
|
||||
|
@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
|
|||
shared);
|
||||
}
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn,
|
||||
kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
|
||||
KVM_PAGES_PER_HPAGE(level + 1));
|
||||
|
||||
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
|
||||
|
@ -1033,9 +1031,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
|
|||
{
|
||||
struct kvm_mmu_page *root;
|
||||
|
||||
for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
|
||||
flush |= zap_gfn_range(kvm, root, range->start, range->end,
|
||||
range->may_block, flush, false);
|
||||
for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
|
||||
flush = zap_gfn_range(kvm, root, range->start, range->end,
|
||||
range->may_block, flush, false);
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
@ -1364,10 +1362,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
|||
* Clear leaf entries which could be replaced by large mappings, for
|
||||
* GFNs within the slot.
|
||||
*/
|
||||
static bool zap_collapsible_spte_range(struct kvm *kvm,
|
||||
static void zap_collapsible_spte_range(struct kvm *kvm,
|
||||
struct kvm_mmu_page *root,
|
||||
const struct kvm_memory_slot *slot,
|
||||
bool flush)
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
gfn_t start = slot->base_gfn;
|
||||
gfn_t end = start + slot->npages;
|
||||
|
@ -1378,10 +1375,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
|
|||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
retry:
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
|
||||
flush = false;
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte) ||
|
||||
!is_last_spte(iter.old_spte, iter.level))
|
||||
|
@ -1393,6 +1388,7 @@ retry:
|
|||
pfn, PG_LEVEL_NUM))
|
||||
continue;
|
||||
|
||||
/* Note, a successful atomic zap also does a remote TLB flush. */
|
||||
if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
|
||||
/*
|
||||
* The iter must explicitly re-read the SPTE because
|
||||
|
@ -1401,30 +1397,24 @@ retry:
|
|||
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
|
||||
goto retry;
|
||||
}
|
||||
flush = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear non-leaf entries (and free associated page tables) which could
|
||||
* be replaced by large mappings, for GFNs within the slot.
|
||||
*/
|
||||
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
bool flush)
|
||||
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
|
||||
flush = zap_collapsible_spte_range(kvm, root, slot, flush);
|
||||
|
||||
return flush;
|
||||
zap_collapsible_spte_range(kvm, root, slot);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
|||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, unsigned long mask,
|
||||
bool wrprot);
|
||||
bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
bool flush);
|
||||
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot);
|
||||
|
||||
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
|
|
|
@ -900,6 +900,7 @@ out:
|
|||
bool svm_check_apicv_inhibit_reasons(ulong bit)
|
||||
{
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_ABSENT) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) |
|
||||
BIT(APICV_INHIBIT_REASON_NESTED) |
|
||||
BIT(APICV_INHIBIT_REASON_IRQWIN) |
|
||||
|
@ -989,16 +990,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int cpu = get_cpu();
|
||||
|
||||
WARN_ON(cpu != vcpu->cpu);
|
||||
svm->avic_is_running = is_run;
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
if (is_run)
|
||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||
else
|
||||
avic_vcpu_put(vcpu);
|
||||
if (kvm_vcpu_apicv_active(vcpu)) {
|
||||
if (is_run)
|
||||
avic_vcpu_load(vcpu, cpu);
|
||||
else
|
||||
avic_vcpu_put(vcpu);
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -281,7 +281,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
|
|||
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
|
||||
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
pmu->reserved_bits = 0xfffffff000280000ull;
|
||||
pmu->version = 1;
|
||||
/* not applicable to AMD; but clean them to prevent any fall out */
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
|
|
|
@ -1543,28 +1543,50 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int sev_lock_for_migration(struct kvm *kvm)
|
||||
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
|
||||
{
|
||||
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
||||
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
|
||||
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
|
||||
int r = -EBUSY;
|
||||
|
||||
if (dst_kvm == src_kvm)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Bail if this VM is already involved in a migration to avoid deadlock
|
||||
* between two VMs trying to migrate to/from each other.
|
||||
* Bail if these VMs are already involved in a migration to avoid
|
||||
* deadlock between two VMs trying to migrate to/from each other.
|
||||
*/
|
||||
if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
|
||||
if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
|
||||
return -EBUSY;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
|
||||
goto release_dst;
|
||||
|
||||
r = -EINTR;
|
||||
if (mutex_lock_killable(&dst_kvm->lock))
|
||||
goto release_src;
|
||||
if (mutex_lock_killable(&src_kvm->lock))
|
||||
goto unlock_dst;
|
||||
return 0;
|
||||
|
||||
unlock_dst:
|
||||
mutex_unlock(&dst_kvm->lock);
|
||||
release_src:
|
||||
atomic_set_release(&src_sev->migration_in_progress, 0);
|
||||
release_dst:
|
||||
atomic_set_release(&dst_sev->migration_in_progress, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void sev_unlock_after_migration(struct kvm *kvm)
|
||||
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
|
||||
{
|
||||
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
||||
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
|
||||
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
atomic_set_release(&sev->migration_in_progress, 0);
|
||||
mutex_unlock(&dst_kvm->lock);
|
||||
mutex_unlock(&src_kvm->lock);
|
||||
atomic_set_release(&dst_sev->migration_in_progress, 0);
|
||||
atomic_set_release(&src_sev->migration_in_progress, 0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1607,14 +1629,15 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
|
|||
dst->asid = src->asid;
|
||||
dst->handle = src->handle;
|
||||
dst->pages_locked = src->pages_locked;
|
||||
dst->enc_context_owner = src->enc_context_owner;
|
||||
|
||||
src->asid = 0;
|
||||
src->active = false;
|
||||
src->handle = 0;
|
||||
src->pages_locked = 0;
|
||||
src->enc_context_owner = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&dst->regions_list);
|
||||
list_replace_init(&src->regions_list, &dst->regions_list);
|
||||
list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
|
||||
}
|
||||
|
||||
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
|
||||
|
@ -1666,15 +1689,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
|
|||
bool charged = false;
|
||||
int ret;
|
||||
|
||||
ret = sev_lock_for_migration(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sev_guest(kvm)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
source_kvm_file = fget(source_fd);
|
||||
if (!file_is_kvm(source_kvm_file)) {
|
||||
ret = -EBADF;
|
||||
|
@ -1682,16 +1696,26 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
|
|||
}
|
||||
|
||||
source_kvm = source_kvm_file->private_data;
|
||||
ret = sev_lock_for_migration(source_kvm);
|
||||
ret = sev_lock_two_vms(kvm, source_kvm);
|
||||
if (ret)
|
||||
goto out_fput;
|
||||
|
||||
if (!sev_guest(source_kvm)) {
|
||||
if (sev_guest(kvm) || !sev_guest(source_kvm)) {
|
||||
ret = -EINVAL;
|
||||
goto out_source;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
src_sev = &to_kvm_svm(source_kvm)->sev_info;
|
||||
|
||||
/*
|
||||
* VMs mirroring src's encryption context rely on it to keep the
|
||||
* ASID allocated, but below we are clearing src_sev->asid.
|
||||
*/
|
||||
if (src_sev->num_mirrored_vms) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
dst_sev->misc_cg = get_current_misc_cg();
|
||||
cg_cleanup_sev = dst_sev;
|
||||
if (dst_sev->misc_cg != src_sev->misc_cg) {
|
||||
|
@ -1728,13 +1752,11 @@ out_dst_cgroup:
|
|||
sev_misc_cg_uncharge(cg_cleanup_sev);
|
||||
put_misc_cg(cg_cleanup_sev->misc_cg);
|
||||
cg_cleanup_sev->misc_cg = NULL;
|
||||
out_source:
|
||||
sev_unlock_after_migration(source_kvm);
|
||||
out_unlock:
|
||||
sev_unlock_two_vms(kvm, source_kvm);
|
||||
out_fput:
|
||||
if (source_kvm_file)
|
||||
fput(source_kvm_file);
|
||||
out_unlock:
|
||||
sev_unlock_after_migration(kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1953,76 +1975,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
|
|||
{
|
||||
struct file *source_kvm_file;
|
||||
struct kvm *source_kvm;
|
||||
struct kvm_sev_info source_sev, *mirror_sev;
|
||||
struct kvm_sev_info *source_sev, *mirror_sev;
|
||||
int ret;
|
||||
|
||||
source_kvm_file = fget(source_fd);
|
||||
if (!file_is_kvm(source_kvm_file)) {
|
||||
ret = -EBADF;
|
||||
goto e_source_put;
|
||||
goto e_source_fput;
|
||||
}
|
||||
|
||||
source_kvm = source_kvm_file->private_data;
|
||||
mutex_lock(&source_kvm->lock);
|
||||
ret = sev_lock_two_vms(kvm, source_kvm);
|
||||
if (ret)
|
||||
goto e_source_fput;
|
||||
|
||||
if (!sev_guest(source_kvm)) {
|
||||
/*
|
||||
* Mirrors of mirrors should work, but let's not get silly. Also
|
||||
* disallow out-of-band SEV/SEV-ES init if the target is already an
|
||||
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
|
||||
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
|
||||
*/
|
||||
if (sev_guest(kvm) || !sev_guest(source_kvm) ||
|
||||
is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
|
||||
ret = -EINVAL;
|
||||
goto e_source_unlock;
|
||||
goto e_unlock;
|
||||
}
|
||||
|
||||
/* Mirrors of mirrors should work, but let's not get silly */
|
||||
if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
|
||||
ret = -EINVAL;
|
||||
goto e_source_unlock;
|
||||
}
|
||||
|
||||
memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
|
||||
sizeof(source_sev));
|
||||
|
||||
/*
|
||||
* The mirror kvm holds an enc_context_owner ref so its asid can't
|
||||
* disappear until we're done with it
|
||||
*/
|
||||
source_sev = &to_kvm_svm(source_kvm)->sev_info;
|
||||
kvm_get_kvm(source_kvm);
|
||||
|
||||
fput(source_kvm_file);
|
||||
mutex_unlock(&source_kvm->lock);
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* Disallow out-of-band SEV/SEV-ES init if the target is already an
|
||||
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
|
||||
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
|
||||
*/
|
||||
if (sev_guest(kvm) || kvm->created_vcpus) {
|
||||
ret = -EINVAL;
|
||||
goto e_mirror_unlock;
|
||||
}
|
||||
source_sev->num_mirrored_vms++;
|
||||
|
||||
/* Set enc_context_owner and copy its encryption context over */
|
||||
mirror_sev = &to_kvm_svm(kvm)->sev_info;
|
||||
mirror_sev->enc_context_owner = source_kvm;
|
||||
mirror_sev->active = true;
|
||||
mirror_sev->asid = source_sev.asid;
|
||||
mirror_sev->fd = source_sev.fd;
|
||||
mirror_sev->es_active = source_sev.es_active;
|
||||
mirror_sev->handle = source_sev.handle;
|
||||
mirror_sev->asid = source_sev->asid;
|
||||
mirror_sev->fd = source_sev->fd;
|
||||
mirror_sev->es_active = source_sev->es_active;
|
||||
mirror_sev->handle = source_sev->handle;
|
||||
INIT_LIST_HEAD(&mirror_sev->regions_list);
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* Do not copy ap_jump_table. Since the mirror does not share the same
|
||||
* KVM contexts as the original, and they may have different
|
||||
* memory-views.
|
||||
*/
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
|
||||
e_mirror_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
kvm_put_kvm(source_kvm);
|
||||
return ret;
|
||||
e_source_unlock:
|
||||
mutex_unlock(&source_kvm->lock);
|
||||
e_source_put:
|
||||
e_unlock:
|
||||
sev_unlock_two_vms(kvm, source_kvm);
|
||||
e_source_fput:
|
||||
if (source_kvm_file)
|
||||
fput(source_kvm_file);
|
||||
return ret;
|
||||
|
@ -2034,17 +2040,24 @@ void sev_vm_destroy(struct kvm *kvm)
|
|||
struct list_head *head = &sev->regions_list;
|
||||
struct list_head *pos, *q;
|
||||
|
||||
WARN_ON(sev->num_mirrored_vms);
|
||||
|
||||
if (!sev_guest(kvm))
|
||||
return;
|
||||
|
||||
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
|
||||
if (is_mirroring_enc_context(kvm)) {
|
||||
kvm_put_kvm(sev->enc_context_owner);
|
||||
struct kvm *owner_kvm = sev->enc_context_owner;
|
||||
struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
|
||||
|
||||
mutex_lock(&owner_kvm->lock);
|
||||
if (!WARN_ON(!owner_sev->num_mirrored_vms))
|
||||
owner_sev->num_mirrored_vms--;
|
||||
mutex_unlock(&owner_kvm->lock);
|
||||
kvm_put_kvm(owner_kvm);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* Ensure that all guest tagged cache entries are flushed before
|
||||
* releasing the pages back to the system for use. CLFLUSH will
|
||||
|
@ -2064,8 +2077,6 @@ void sev_vm_destroy(struct kvm *kvm)
|
|||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
sev_unbind_asid(kvm, sev->handle);
|
||||
sev_asid_free(sev);
|
||||
}
|
||||
|
@ -2249,7 +2260,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
|
|||
__free_page(virt_to_page(svm->sev_es.vmsa));
|
||||
|
||||
if (svm->sev_es.ghcb_sa_free)
|
||||
kfree(svm->sev_es.ghcb_sa);
|
||||
kvfree(svm->sev_es.ghcb_sa);
|
||||
}
|
||||
|
||||
static void dump_ghcb(struct vcpu_svm *svm)
|
||||
|
@ -2341,24 +2352,29 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
|
|||
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
|
||||
}
|
||||
|
||||
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
||||
static bool sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct ghcb *ghcb;
|
||||
u64 exit_code = 0;
|
||||
u64 exit_code;
|
||||
u64 reason;
|
||||
|
||||
ghcb = svm->sev_es.ghcb;
|
||||
|
||||
/* Only GHCB Usage code 0 is supported */
|
||||
if (ghcb->ghcb_usage)
|
||||
goto vmgexit_err;
|
||||
|
||||
/*
|
||||
* Retrieve the exit code now even though is may not be marked valid
|
||||
* Retrieve the exit code now even though it may not be marked valid
|
||||
* as it could help with debugging.
|
||||
*/
|
||||
exit_code = ghcb_get_sw_exit_code(ghcb);
|
||||
|
||||
/* Only GHCB Usage code 0 is supported */
|
||||
if (ghcb->ghcb_usage) {
|
||||
reason = GHCB_ERR_INVALID_USAGE;
|
||||
goto vmgexit_err;
|
||||
}
|
||||
|
||||
reason = GHCB_ERR_MISSING_INPUT;
|
||||
|
||||
if (!ghcb_sw_exit_code_is_valid(ghcb) ||
|
||||
!ghcb_sw_exit_info_1_is_valid(ghcb) ||
|
||||
!ghcb_sw_exit_info_2_is_valid(ghcb))
|
||||
|
@ -2437,30 +2453,34 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
|||
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
|
||||
break;
|
||||
default:
|
||||
reason = GHCB_ERR_INVALID_EVENT;
|
||||
goto vmgexit_err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
|
||||
vmgexit_err:
|
||||
vcpu = &svm->vcpu;
|
||||
|
||||
if (ghcb->ghcb_usage) {
|
||||
if (reason == GHCB_ERR_INVALID_USAGE) {
|
||||
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
|
||||
ghcb->ghcb_usage);
|
||||
} else if (reason == GHCB_ERR_INVALID_EVENT) {
|
||||
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
|
||||
exit_code);
|
||||
} else {
|
||||
vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
|
||||
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
|
||||
exit_code);
|
||||
dump_ghcb(svm);
|
||||
}
|
||||
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = exit_code;
|
||||
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
|
||||
/* Clear the valid entries fields */
|
||||
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
|
||||
|
||||
return -EINVAL;
|
||||
ghcb_set_sw_exit_info_1(ghcb, 2);
|
||||
ghcb_set_sw_exit_info_2(ghcb, reason);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
|
||||
|
@ -2482,7 +2502,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
|
|||
svm->sev_es.ghcb_sa_sync = false;
|
||||
}
|
||||
|
||||
kfree(svm->sev_es.ghcb_sa);
|
||||
kvfree(svm->sev_es.ghcb_sa);
|
||||
svm->sev_es.ghcb_sa = NULL;
|
||||
svm->sev_es.ghcb_sa_free = false;
|
||||
}
|
||||
|
@ -2530,14 +2550,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
|
|||
scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
|
||||
if (!scratch_gpa_beg) {
|
||||
pr_err("vmgexit: scratch gpa not provided\n");
|
||||
return false;
|
||||
goto e_scratch;
|
||||
}
|
||||
|
||||
scratch_gpa_end = scratch_gpa_beg + len;
|
||||
if (scratch_gpa_end < scratch_gpa_beg) {
|
||||
pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
|
||||
len, scratch_gpa_beg);
|
||||
return false;
|
||||
goto e_scratch;
|
||||
}
|
||||
|
||||
if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
|
||||
|
@ -2555,7 +2575,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
|
|||
scratch_gpa_end > ghcb_scratch_end) {
|
||||
pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
|
||||
scratch_gpa_beg, scratch_gpa_end);
|
||||
return false;
|
||||
goto e_scratch;
|
||||
}
|
||||
|
||||
scratch_va = (void *)svm->sev_es.ghcb;
|
||||
|
@ -2568,18 +2588,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
|
|||
if (len > GHCB_SCRATCH_AREA_LIMIT) {
|
||||
pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
|
||||
len, GHCB_SCRATCH_AREA_LIMIT);
|
||||
return false;
|
||||
goto e_scratch;
|
||||
}
|
||||
scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
|
||||
scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
|
||||
if (!scratch_va)
|
||||
return false;
|
||||
goto e_scratch;
|
||||
|
||||
if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
|
||||
/* Unable to copy scratch area from guest */
|
||||
pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
|
||||
|
||||
kfree(scratch_va);
|
||||
return false;
|
||||
kvfree(scratch_va);
|
||||
goto e_scratch;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2596,6 +2616,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
|
|||
svm->sev_es.ghcb_sa_len = len;
|
||||
|
||||
return true;
|
||||
|
||||
e_scratch:
|
||||
ghcb_set_sw_exit_info_1(ghcb, 2);
|
||||
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
|
||||
|
@ -2646,7 +2672,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
|
|||
|
||||
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
|
||||
if (!ret) {
|
||||
ret = -EINVAL;
|
||||
/* Error, keep GHCB MSR value as-is */
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2682,10 +2708,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
|
|||
GHCB_MSR_TERM_REASON_POS);
|
||||
pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
|
||||
reason_set, reason_code);
|
||||
fallthrough;
|
||||
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
/* Error, keep GHCB MSR value as-is */
|
||||
break;
|
||||
}
|
||||
|
||||
trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
|
||||
|
@ -2709,14 +2738,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (!ghcb_gpa) {
|
||||
vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
|
||||
return -EINVAL;
|
||||
|
||||
/* Without a GHCB, just return right back to the guest */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
|
||||
/* Unable to map GHCB from guest */
|
||||
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
|
||||
ghcb_gpa);
|
||||
return -EINVAL;
|
||||
|
||||
/* Without a GHCB, just return right back to the guest */
|
||||
return 1;
|
||||
}
|
||||
|
||||
svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
|
||||
|
@ -2726,15 +2759,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
|
||||
exit_code = ghcb_get_sw_exit_code(ghcb);
|
||||
|
||||
ret = sev_es_validate_vmgexit(svm);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!sev_es_validate_vmgexit(svm))
|
||||
return 1;
|
||||
|
||||
sev_es_sync_from_ghcb(svm);
|
||||
ghcb_set_sw_exit_info_1(ghcb, 0);
|
||||
ghcb_set_sw_exit_info_2(ghcb, 0);
|
||||
|
||||
ret = -EINVAL;
|
||||
ret = 1;
|
||||
switch (exit_code) {
|
||||
case SVM_VMGEXIT_MMIO_READ:
|
||||
if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
|
||||
|
@ -2775,20 +2807,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
default:
|
||||
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
|
||||
control->exit_info_1);
|
||||
ghcb_set_sw_exit_info_1(ghcb, 1);
|
||||
ghcb_set_sw_exit_info_2(ghcb,
|
||||
X86_TRAP_UD |
|
||||
SVM_EVTINJ_TYPE_EXEPT |
|
||||
SVM_EVTINJ_VALID);
|
||||
ghcb_set_sw_exit_info_1(ghcb, 2);
|
||||
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
|
||||
vcpu_unimpl(vcpu,
|
||||
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
|
||||
control->exit_info_1, control->exit_info_2);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
default:
|
||||
ret = svm_invoke_exit_handler(vcpu, exit_code);
|
||||
|
@ -2810,7 +2839,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
|
|||
return -EINVAL;
|
||||
|
||||
if (!setup_vmgexit_scratch(svm, in, bytes))
|
||||
return -EINVAL;
|
||||
return 1;
|
||||
|
||||
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
|
||||
count, in);
|
||||
|
|
|
@ -4651,7 +4651,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
|||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.hwapic_irr_update = svm_hwapic_irr_update,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
.sync_pir_to_irr = kvm_lapic_find_highest_irr,
|
||||
.apicv_post_state_restore = avic_post_state_restore,
|
||||
|
||||
.set_tss_addr = svm_set_tss_addr,
|
||||
|
|
|
@ -79,6 +79,7 @@ struct kvm_sev_info {
|
|||
struct list_head regions_list; /* List of registered regions */
|
||||
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
|
||||
struct kvm *enc_context_owner; /* Owner of copied encryption context */
|
||||
unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
|
||||
struct misc_cg *misc_cg; /* For misc cgroup accounting */
|
||||
atomic_t migration_in_progress;
|
||||
};
|
||||
|
|
|
@ -1162,29 +1162,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
|
|||
WARN_ON(!enable_vpid);
|
||||
|
||||
/*
|
||||
* If VPID is enabled and used by vmc12, but L2 does not have a unique
|
||||
* TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
|
||||
* a VPID for L2, flush the current context as the effective ASID is
|
||||
* common to both L1 and L2.
|
||||
*
|
||||
* Defer the flush so that it runs after vmcs02.EPTP has been set by
|
||||
* KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
|
||||
* redundant flushes further down the nested pipeline.
|
||||
*
|
||||
* If a TLB flush isn't required due to any of the above, and vpid12 is
|
||||
* changing then the new "virtual" VPID (vpid12) will reuse the same
|
||||
* "real" VPID (vpid02), and so needs to be flushed. There's no direct
|
||||
* mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
|
||||
* all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
|
||||
* guest-physical mappings, so there is no need to sync the nEPT MMU.
|
||||
* VPID is enabled and in use by vmcs12. If vpid12 is changing, then
|
||||
* emulate a guest TLB flush as KVM does not track vpid12 history nor
|
||||
* is the VPID incorporated into the MMU context. I.e. KVM must assume
|
||||
* that the new vpid12 has never been used and thus represents a new
|
||||
* guest ASID that cannot have entries in the TLB.
|
||||
*/
|
||||
if (!nested_has_guest_tlb_tag(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
||||
} else if (is_vmenter &&
|
||||
vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
|
||||
if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
|
||||
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
|
||||
vpid_sync_context(nested_get_vpid02(vcpu));
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If VPID is enabled, used by vmc12, and vpid12 is not changing but
|
||||
* does not have a unique TLB tag (ASID), i.e. EPT is disabled and
|
||||
* KVM was unable to allocate a VPID for L2, flush the current context
|
||||
* as the effective ASID is common to both L1 and L2.
|
||||
*/
|
||||
if (!nested_has_guest_tlb_tag(vcpu))
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
||||
}
|
||||
|
||||
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
|
||||
|
@ -2594,8 +2591,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
|
||||
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
||||
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
vmcs12->guest_ia32_perf_global_ctrl)))
|
||||
vmcs12->guest_ia32_perf_global_ctrl))) {
|
||||
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
|
||||
kvm_rip_write(vcpu, vmcs12->guest_rip);
|
||||
|
@ -3344,8 +3343,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
|||
};
|
||||
u32 failed_index;
|
||||
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
kvm_service_local_tlb_flush_requests(vcpu);
|
||||
|
||||
evaluate_pending_interrupts = exec_controls_get(vmx) &
|
||||
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
|
||||
|
@ -4502,9 +4500,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
|||
(void)nested_get_evmcs_page(vcpu);
|
||||
}
|
||||
|
||||
/* Service the TLB flush request for L2 before switching to L1. */
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
/* Service pending TLB flush requests for L2 before switching to L1. */
|
||||
kvm_service_local_tlb_flush_requests(vcpu);
|
||||
|
||||
/*
|
||||
* VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
|
||||
|
@ -4857,6 +4854,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
|
|||
if (!vmx->nested.cached_vmcs12)
|
||||
goto out_cached_vmcs12;
|
||||
|
||||
vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
|
||||
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->nested.cached_shadow_vmcs12)
|
||||
goto out_cached_shadow_vmcs12;
|
||||
|
@ -5289,8 +5287,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
|||
struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
|
||||
struct vmcs_hdr hdr;
|
||||
|
||||
if (ghc->gpa != vmptr &&
|
||||
kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
|
||||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
|
||||
/*
|
||||
* Reads from an unbacked page return all 1s,
|
||||
* which means that the 32 bits located at the
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <asm/cpu.h>
|
||||
|
||||
#include "lapic.h"
|
||||
#include "irq.h"
|
||||
#include "posted_intr.h"
|
||||
#include "trace.h"
|
||||
#include "vmx.h"
|
||||
|
@ -77,13 +78,18 @@ after_clear_sn:
|
|||
pi_set_on(pi_desc);
|
||||
}
|
||||
|
||||
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
|
||||
{
|
||||
return irqchip_in_kernel(kvm) && enable_apicv &&
|
||||
kvm_arch_has_assigned_device(kvm) &&
|
||||
irq_remapping_cap(IRQ_POSTING_CAP);
|
||||
}
|
||||
|
||||
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||
|
||||
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
||||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
|
||||
!kvm_vcpu_apicv_active(vcpu))
|
||||
if (!vmx_can_use_vtd_pi(vcpu->kvm))
|
||||
return;
|
||||
|
||||
/* Set SN when the vCPU is preempted */
|
||||
|
@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
|
|||
struct pi_desc old, new;
|
||||
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||
|
||||
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
||||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
|
||||
!kvm_vcpu_apicv_active(vcpu))
|
||||
if (!vmx_can_use_vtd_pi(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
WARN_ON(irqs_disabled());
|
||||
|
@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
|
|||
struct vcpu_data vcpu_info;
|
||||
int idx, ret = 0;
|
||||
|
||||
if (!kvm_arch_has_assigned_device(kvm) ||
|
||||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
|
||||
!kvm_vcpu_apicv_active(kvm->vcpus[0]))
|
||||
if (!vmx_can_use_vtd_pi(kvm))
|
||||
return 0;
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
|
|
|
@ -2918,6 +2918,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_guest_mode(vcpu))
|
||||
return nested_get_vpid02(vcpu);
|
||||
return to_vmx(vcpu)->vpid;
|
||||
}
|
||||
|
||||
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_mmu *mmu = vcpu->arch.mmu;
|
||||
|
@ -2930,31 +2937,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
|
|||
if (enable_ept)
|
||||
ept_sync_context(construct_eptp(vcpu, root_hpa,
|
||||
mmu->shadow_root_level));
|
||||
else if (!is_guest_mode(vcpu))
|
||||
vpid_sync_context(to_vmx(vcpu)->vpid);
|
||||
else
|
||||
vpid_sync_context(nested_get_vpid02(vcpu));
|
||||
vpid_sync_context(vmx_get_current_vpid(vcpu));
|
||||
}
|
||||
|
||||
static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
|
||||
{
|
||||
/*
|
||||
* vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
|
||||
* vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
|
||||
* vmx_flush_tlb_guest() for an explanation of why this is ok.
|
||||
*/
|
||||
vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
|
||||
vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
|
||||
}
|
||||
|
||||
static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
|
||||
* or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit
|
||||
* are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
|
||||
* vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
|
||||
* vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are
|
||||
* required to flush GVA->{G,H}PA mappings from the TLB if vpid is
|
||||
* disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
|
||||
* i.e. no explicit INVVPID is necessary.
|
||||
*/
|
||||
vpid_sync_context(to_vmx(vcpu)->vpid);
|
||||
vpid_sync_context(vmx_get_current_vpid(vcpu));
|
||||
}
|
||||
|
||||
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
||||
|
@ -6262,9 +6267,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int max_irr;
|
||||
bool max_irr_updated;
|
||||
bool got_posted_interrupt;
|
||||
|
||||
if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
|
||||
if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
if (pi_test_on(&vmx->pi_desc)) {
|
||||
|
@ -6274,22 +6279,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
|||
* But on x86 this is just a compiler barrier anyway.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
max_irr_updated =
|
||||
got_posted_interrupt =
|
||||
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
|
||||
|
||||
/*
|
||||
* If we are running L2 and L1 has a new pending interrupt
|
||||
* which can be injected, this may cause a vmexit or it may
|
||||
* be injected into L2. Either way, this interrupt will be
|
||||
* processed via KVM_REQ_EVENT, not RVI, because we do not use
|
||||
* virtual interrupt delivery to inject L1 interrupts into L2.
|
||||
*/
|
||||
if (is_guest_mode(vcpu) && max_irr_updated)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
} else {
|
||||
max_irr = kvm_lapic_find_highest_irr(vcpu);
|
||||
got_posted_interrupt = false;
|
||||
}
|
||||
vmx_hwapic_irr_update(vcpu, max_irr);
|
||||
|
||||
/*
|
||||
* Newly recognized interrupts are injected via either virtual interrupt
|
||||
* delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
|
||||
* disabled in two cases:
|
||||
*
|
||||
* 1) If L2 is running and the vCPU has a new pending interrupt. If L1
|
||||
* wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
|
||||
* VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
|
||||
* into L2, but KVM doesn't use virtual interrupt delivery to inject
|
||||
* interrupts into L2, and so KVM_REQ_EVENT is again needed.
|
||||
*
|
||||
* 2) If APICv is disabled for this vCPU, assigned devices may still
|
||||
* attempt to post interrupts. The posted interrupt vector will cause
|
||||
* a VM-Exit and the subsequent entry will call sync_pir_to_irr.
|
||||
*/
|
||||
if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
|
||||
vmx_set_rvi(max_irr);
|
||||
else if (got_posted_interrupt)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
return max_irr;
|
||||
}
|
||||
|
||||
|
@ -7509,6 +7525,7 @@ static void hardware_unsetup(void)
|
|||
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
|
||||
{
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_ABSENT) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) |
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
|
||||
|
||||
|
@ -7761,10 +7778,10 @@ static __init int hardware_setup(void)
|
|||
ple_window_shrink = 0;
|
||||
}
|
||||
|
||||
if (!cpu_has_vmx_apicv()) {
|
||||
if (!cpu_has_vmx_apicv())
|
||||
enable_apicv = 0;
|
||||
if (!enable_apicv)
|
||||
vmx_x86_ops.sync_pir_to_irr = NULL;
|
||||
}
|
||||
|
||||
if (cpu_has_vmx_tsc_scaling()) {
|
||||
kvm_has_tsc_control = true;
|
||||
|
|
|
@ -3258,6 +3258,29 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
|
|||
static_call(kvm_x86_tlb_flush_guest)(vcpu);
|
||||
}
|
||||
|
||||
|
||||
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
++vcpu->stat.tlb_flush;
|
||||
static_call(kvm_x86_tlb_flush_current)(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Service "local" TLB flush requests, which are specific to the current MMU
|
||||
* context. In addition to the generic event handling in vcpu_enter_guest(),
|
||||
* TLB flushes that are targeted at an MMU context also need to be serviced
|
||||
* prior before nested VM-Enter/VM-Exit.
|
||||
*/
|
||||
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
|
||||
kvm_vcpu_flush_tlb_guest(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
|
||||
|
||||
static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
|
||||
|
@ -4133,6 +4156,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_SGX_ATTRIBUTE:
|
||||
#endif
|
||||
case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
|
||||
case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
|
||||
case KVM_CAP_SREGS2:
|
||||
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
|
||||
case KVM_CAP_VCPU_ATTRIBUTES:
|
||||
|
@ -4448,8 +4472,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_state *s)
|
||||
{
|
||||
if (vcpu->arch.apicv_active)
|
||||
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
|
||||
return kvm_apic_get_state(vcpu, s);
|
||||
}
|
||||
|
@ -5124,6 +5147,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
struct kvm_cpuid __user *cpuid_arg = argp;
|
||||
struct kvm_cpuid cpuid;
|
||||
|
||||
/*
|
||||
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
|
||||
* MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
|
||||
* tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
|
||||
* faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
|
||||
* the core vCPU model on the fly, so fail.
|
||||
*/
|
||||
r = -EINVAL;
|
||||
if (vcpu->arch.last_vmentry_cpu != -1)
|
||||
goto out;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
|
||||
goto out;
|
||||
|
@ -5134,6 +5168,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
struct kvm_cpuid2 __user *cpuid_arg = argp;
|
||||
struct kvm_cpuid2 cpuid;
|
||||
|
||||
/*
|
||||
* KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
|
||||
* KVM_SET_CPUID case above.
|
||||
*/
|
||||
r = -EINVAL;
|
||||
if (vcpu->arch.last_vmentry_cpu != -1)
|
||||
goto out;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
|
||||
goto out;
|
||||
|
@ -5698,6 +5740,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||
smp_wmb();
|
||||
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
|
||||
kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
|
||||
kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
|
||||
r = 0;
|
||||
split_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
@ -6078,6 +6121,7 @@ set_identity_unlock:
|
|||
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
|
||||
smp_wmb();
|
||||
kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
|
||||
kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
|
||||
create_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
|
@ -8776,10 +8820,9 @@ static void kvm_apicv_init(struct kvm *kvm)
|
|||
{
|
||||
init_rwsem(&kvm->arch.apicv_update_lock);
|
||||
|
||||
if (enable_apicv)
|
||||
clear_bit(APICV_INHIBIT_REASON_DISABLE,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
else
|
||||
set_bit(APICV_INHIBIT_REASON_ABSENT,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
if (!enable_apicv)
|
||||
set_bit(APICV_INHIBIT_REASON_DISABLE,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
}
|
||||
|
@ -9528,8 +9571,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
|||
if (irqchip_split(vcpu->kvm))
|
||||
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
else {
|
||||
if (vcpu->arch.apicv_active)
|
||||
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
if (ioapic_in_kernel(vcpu->kvm))
|
||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
}
|
||||
|
@ -9648,10 +9690,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
/* Flushing all ASIDs flushes the current ASID... */
|
||||
kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
|
||||
kvm_vcpu_flush_tlb_guest(vcpu);
|
||||
kvm_service_local_tlb_flush_requests(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
|
||||
|
@ -9802,10 +9841,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
|
||||
/*
|
||||
* This handles the case where a posted interrupt was
|
||||
* notified with kvm_vcpu_kick.
|
||||
* notified with kvm_vcpu_kick. Assigned devices can
|
||||
* use the POSTED_INTR_VECTOR even if APICv is disabled,
|
||||
* so do it even if APICv is disabled on this vCPU.
|
||||
*/
|
||||
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
|
||||
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
if (kvm_lapic_enabled(vcpu))
|
||||
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
|
||||
if (kvm_vcpu_exit_request(vcpu)) {
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
|
@ -9849,8 +9890,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
|
||||
break;
|
||||
|
||||
if (vcpu->arch.apicv_active)
|
||||
static_call(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
if (kvm_lapic_enabled(vcpu))
|
||||
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
|
||||
|
||||
if (unlikely(kvm_vcpu_exit_request(vcpu))) {
|
||||
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
|
||||
|
|
|
@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
|
|||
|
||||
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
|
||||
|
||||
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
|
||||
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
|
||||
|
@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
|
|||
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
++vcpu->stat.tlb_flush;
|
||||
static_call(kvm_x86_tlb_flush_current)(vcpu);
|
||||
}
|
||||
|
||||
static inline int is_pae(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
|
||||
|
|
|
@ -72,6 +72,7 @@ static void __init setup_real_mode(void)
|
|||
#ifdef CONFIG_X86_64
|
||||
u64 *trampoline_pgd;
|
||||
u64 efer;
|
||||
int i;
|
||||
#endif
|
||||
|
||||
base = (unsigned char *)real_mode_header;
|
||||
|
@ -128,8 +129,17 @@ static void __init setup_real_mode(void)
|
|||
trampoline_header->flags = 0;
|
||||
|
||||
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
|
||||
|
||||
/* Map the real mode stub as virtual == physical */
|
||||
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
|
||||
trampoline_pgd[511] = init_top_pgt[511].pgd;
|
||||
|
||||
/*
|
||||
* Include the entirety of the kernel mapping into the trampoline
|
||||
* PGD. This way, all mappings present in the normal kernel page
|
||||
* tables are usable while running on trampoline_pgd.
|
||||
*/
|
||||
for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
|
||||
trampoline_pgd[i] = init_top_pgt[i].pgd;
|
||||
#endif
|
||||
|
||||
sme_sev_setup_real_mode(trampoline_header);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <../entry/calling.h>
|
||||
|
||||
.pushsection .noinstr.text, "ax"
|
||||
/*
|
||||
|
@ -192,6 +193,25 @@ SYM_CODE_START(xen_iret)
|
|||
jmp hypercall_iret
|
||||
SYM_CODE_END(xen_iret)
|
||||
|
||||
/*
|
||||
* XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
|
||||
* also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode()
|
||||
* in XEN pv would cause %rsp to move up to the top of the kernel stack and
|
||||
* leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
|
||||
* interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
|
||||
* frame at the same address is useless.
|
||||
*/
|
||||
SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
|
||||
UNWIND_HINT_REGS
|
||||
POP_REGS
|
||||
|
||||
/* stackleak_erase() can work safely on the kernel stack. */
|
||||
STACKLEAK_ERASE_NOCLOBBER
|
||||
|
||||
addq $8, %rsp /* skip regs->orig_ax */
|
||||
jmp xen_iret
|
||||
SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
|
||||
|
||||
/*
|
||||
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||
* means we have:
|
||||
|
|
|
@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*,
|
|||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||
void flush_dcache_page(struct page *);
|
||||
void flush_dcache_folio(struct folio *);
|
||||
|
||||
void local_flush_cache_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end);
|
||||
|
@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
|
|||
#define flush_cache_vunmap(start,end) do { } while (0)
|
||||
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
|
||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
|
||||
#define flush_dcache_page(page) do { } while (0)
|
||||
static inline void flush_dcache_folio(struct folio *folio) { }
|
||||
|
||||
#define flush_icache_range local_flush_icache_range
|
||||
#define flush_cache_page(vma, addr, pfn) do { } while (0)
|
||||
|
|
|
@ -419,3 +419,4 @@
|
|||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
|
12
block/bdev.c
12
block/bdev.c
|
@ -753,8 +753,7 @@ struct block_device *blkdev_get_no_open(dev_t dev)
|
|||
|
||||
if (!bdev)
|
||||
return NULL;
|
||||
if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
|
||||
!try_module_get(bdev->bd_disk->fops->owner)) {
|
||||
if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
|
||||
put_device(&bdev->bd_device);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -764,7 +763,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
|
|||
|
||||
void blkdev_put_no_open(struct block_device *bdev)
|
||||
{
|
||||
module_put(bdev->bd_disk->fops->owner);
|
||||
put_device(&bdev->bd_device);
|
||||
}
|
||||
|
||||
|
@ -820,12 +818,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
|||
ret = -ENXIO;
|
||||
if (!disk_live(disk))
|
||||
goto abort_claiming;
|
||||
if (!try_module_get(disk->fops->owner))
|
||||
goto abort_claiming;
|
||||
if (bdev_is_partition(bdev))
|
||||
ret = blkdev_get_part(bdev, mode);
|
||||
else
|
||||
ret = blkdev_get_whole(bdev, mode);
|
||||
if (ret)
|
||||
goto abort_claiming;
|
||||
goto put_module;
|
||||
if (mode & FMODE_EXCL) {
|
||||
bd_finish_claiming(bdev, holder);
|
||||
|
||||
|
@ -847,7 +847,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
|||
if (unblock_events)
|
||||
disk_unblock_events(disk);
|
||||
return bdev;
|
||||
|
||||
put_module:
|
||||
module_put(disk->fops->owner);
|
||||
abort_claiming:
|
||||
if (mode & FMODE_EXCL)
|
||||
bd_abort_claiming(bdev, holder);
|
||||
|
@ -956,6 +957,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
|
|||
blkdev_put_whole(bdev, mode);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
module_put(disk->fops->owner);
|
||||
blkdev_put_no_open(bdev);
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_put);
|
||||
|
|
|
@ -1017,6 +1017,7 @@ EXPORT_SYMBOL(submit_bio);
|
|||
/**
|
||||
* bio_poll - poll for BIO completions
|
||||
* @bio: bio to poll for
|
||||
* @iob: batches of IO
|
||||
* @flags: BLK_POLL_* flags that control the behavior
|
||||
*
|
||||
* Poll for completions on queue associated with the bio. Returns number of
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue