2019-05-24 18:04:02 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 06:20:36 +08:00
|
|
|
/* ----------------------------------------------------------------------- *
|
2008-02-04 23:47:59 +08:00
|
|
|
*
|
|
|
|
* Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
|
2009-09-01 05:16:57 +08:00
|
|
|
* Copyright 2009 Intel Corporation; author: H. Peter Anvin
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* x86 MSR access device
|
|
|
|
*
|
|
|
|
* This device is accessed by lseek() to the appropriate register number
|
|
|
|
* and then read/write in chunks of 8 bytes. A larger size means multiple
|
|
|
|
* reads or writes of the same register.
|
|
|
|
*
|
|
|
|
* This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
|
|
|
|
* an SMP box will direct the access to CPU %d.
|
|
|
|
*/
|
|
|
|
|
2014-10-18 04:01:50 +08:00
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/fcntl.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/poll.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/major.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/notifier.h>
|
2009-01-12 17:15:14 +08:00
|
|
|
#include <linux/uaccess.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/gfp.h>
|
2019-08-20 08:17:49 +08:00
|
|
|
#include <linux/security.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-01-27 05:12:04 +08:00
|
|
|
#include <asm/cpufeature.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/msr.h>
|
|
|
|
|
2005-03-24 01:56:34 +08:00
|
|
|
static struct class *msr_class;
|
2016-11-18 02:35:24 +08:00
|
|
|
static enum cpuhp_state cpuhp_msr_state;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
enum allow_write_msrs {
|
|
|
|
MSR_WRITES_ON,
|
|
|
|
MSR_WRITES_OFF,
|
|
|
|
MSR_WRITES_DEFAULT,
|
|
|
|
};
|
|
|
|
|
|
|
|
static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT;
|
|
|
|
|
2008-02-23 06:11:52 +08:00
|
|
|
static ssize_t msr_read(struct file *file, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
u32 __user *tmp = (u32 __user *) buf;
|
|
|
|
u32 data[2];
|
|
|
|
u32 reg = *ppos;
|
2013-02-28 05:59:05 +08:00
|
|
|
int cpu = iminor(file_inode(file));
|
2008-08-26 08:34:27 +08:00
|
|
|
int err = 0;
|
|
|
|
ssize_t bytes = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (count % 8)
|
|
|
|
return -EINVAL; /* Invalid chunk size */
|
|
|
|
|
2006-01-06 16:12:12 +08:00
|
|
|
for (; count; count -= 8) {
|
2007-05-08 23:22:01 +08:00
|
|
|
err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
|
2009-09-01 05:23:29 +08:00
|
|
|
if (err)
|
2008-08-26 08:34:27 +08:00
|
|
|
break;
|
|
|
|
if (copy_to_user(tmp, &data, 8)) {
|
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
2008-08-26 08:27:21 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
tmp += 2;
|
2008-08-26 08:34:27 +08:00
|
|
|
bytes += 8;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-08-26 08:34:27 +08:00
|
|
|
return bytes ? bytes : err;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
static int filter_write(u32 reg)
|
|
|
|
{
|
x86/msr: Prevent userspace MSR access from dominating the console
Applications which manipulate MSRs from userspace often do so
infrequently, and all at once. As such, the default printk ratelimit
architecture supplied by pr_err_ratelimited() doesn't do enough to prevent
kmsg becoming completely overwhelmed with their messages and pushing
other salient information out of the circular buffer.
In one case, I saw over 80% of kmsg being filled with these messages,
and the default kmsg buffer being completely filled less than 5 minutes
after boot(!).
Make things much less aggressive, while still achieving the original
goal of fiter_write(). Operators will still get warnings that MSRs are
being manipulated from userspace, but they won't have other also
potentially useful messages pushed out of the kmsg buffer.
Of course, one can boot with `allow_writes=1` to avoid these messages at
all, but that then has the downfall that one doesn't get _any_
notification at all about these problems in the first place, and so is
much less likely to forget to fix it.
One might rather it was less binary: it was still logged, just less
often, so that application developers _do_ have the incentive to improve
their current methods, without the kernel having to push other useful
stuff out of the kmsg buffer.
This one example isn't the point, of course: I'm sure there are plenty
of other non-ideal-but-pragmatic cases where people are writing to MSRs
from userspace right now, and it will take time for those people to find
other solutions.
Overall, keep the intent of the original patch, while mitigating its
sometimes heavy effects on kmsg composition.
[ bp: Massage a bit. ]
Signed-off-by: Chris Down <chris@chrisdown.name>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/563994ef132ce6cffd28fc659254ca37d032b5ef.1598011595.git.chris@chrisdown.name
2020-08-21 20:10:24 +08:00
|
|
|
/*
|
|
|
|
* MSRs writes usually happen all at once, and can easily saturate kmsg.
|
|
|
|
* Only allow one message every 30 seconds.
|
|
|
|
*
|
|
|
|
* It's possible to be smarter here and do it (for example) per-MSR, but
|
|
|
|
* it would certainly be more complex, and this is enough at least to
|
|
|
|
* avoid saturating the ring buffer.
|
|
|
|
*/
|
|
|
|
static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1);
|
|
|
|
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
switch (allow_writes) {
|
|
|
|
case MSR_WRITES_ON: return 0;
|
|
|
|
case MSR_WRITES_OFF: return -EPERM;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
|
x86/msr: Prevent userspace MSR access from dominating the console
Applications which manipulate MSRs from userspace often do so
infrequently, and all at once. As such, the default printk ratelimit
architecture supplied by pr_err_ratelimited() doesn't do enough to prevent
kmsg becoming completely overwhelmed with their messages and pushing
other salient information out of the circular buffer.
In one case, I saw over 80% of kmsg being filled with these messages,
and the default kmsg buffer being completely filled less than 5 minutes
after boot(!).
Make things much less aggressive, while still achieving the original
goal of fiter_write(). Operators will still get warnings that MSRs are
being manipulated from userspace, but they won't have other also
potentially useful messages pushed out of the kmsg buffer.
Of course, one can boot with `allow_writes=1` to avoid these messages at
all, but that then has the downfall that one doesn't get _any_
notification at all about these problems in the first place, and so is
much less likely to forget to fix it.
One might rather it was less binary: it was still logged, just less
often, so that application developers _do_ have the incentive to improve
their current methods, without the kernel having to push other useful
stuff out of the kmsg buffer.
This one example isn't the point, of course: I'm sure there are plenty
of other non-ideal-but-pragmatic cases where people are writing to MSRs
from userspace right now, and it will take time for those people to find
other solutions.
Overall, keep the intent of the original patch, while mitigating its
sometimes heavy effects on kmsg composition.
[ bp: Massage a bit. ]
Signed-off-by: Chris Down <chris@chrisdown.name>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/563994ef132ce6cffd28fc659254ca37d032b5ef.1598011595.git.chris@chrisdown.name
2020-08-21 20:10:24 +08:00
|
|
|
if (!__ratelimit(&fw_rs))
|
|
|
|
return 0;
|
|
|
|
|
2020-08-21 20:10:35 +08:00
|
|
|
pr_err("Write to unrecognized MSR 0x%x by %s (pid: %d). Please report to x86@kernel.org.\n",
|
|
|
|
reg, current->comm, current->pid);
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static ssize_t msr_write(struct file *file, const char __user *buf,
|
|
|
|
size_t count, loff_t *ppos)
|
|
|
|
{
|
|
|
|
const u32 __user *tmp = (const u32 __user *)buf;
|
|
|
|
u32 data[2];
|
|
|
|
u32 reg = *ppos;
|
2013-02-28 05:59:05 +08:00
|
|
|
int cpu = iminor(file_inode(file));
|
2008-08-26 08:34:27 +08:00
|
|
|
int err = 0;
|
|
|
|
ssize_t bytes = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2019-08-20 08:17:49 +08:00
|
|
|
err = security_locked_down(LOCKDOWN_MSR);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
err = filter_write(reg);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
if (count % 8)
|
|
|
|
return -EINVAL; /* Invalid chunk size */
|
|
|
|
|
2006-12-07 09:14:13 +08:00
|
|
|
for (; count; count -= 8) {
|
2008-08-26 08:34:27 +08:00
|
|
|
if (copy_from_user(&data, tmp, 8)) {
|
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
|
|
|
|
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
|
|
|
|
2007-05-08 23:22:01 +08:00
|
|
|
err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
|
2009-09-01 05:23:29 +08:00
|
|
|
if (err)
|
2008-08-26 08:34:27 +08:00
|
|
|
break;
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
tmp += 2;
|
2008-08-26 08:34:27 +08:00
|
|
|
bytes += 8;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-08-26 08:34:27 +08:00
|
|
|
return bytes ? bytes : err;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2009-09-01 05:16:57 +08:00
|
|
|
static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
|
|
|
|
{
|
|
|
|
u32 __user *uregs = (u32 __user *)arg;
|
|
|
|
u32 regs[8];
|
2013-02-28 05:59:05 +08:00
|
|
|
int cpu = iminor(file_inode(file));
|
2009-09-01 05:16:57 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
switch (ioc) {
|
|
|
|
case X86_IOC_RDMSR_REGS:
|
|
|
|
if (!(file->f_mode & FMODE_READ)) {
|
|
|
|
err = -EBADF;
|
|
|
|
break;
|
|
|
|
}
|
2018-10-28 20:58:28 +08:00
|
|
|
if (copy_from_user(®s, uregs, sizeof(regs))) {
|
2009-09-01 05:16:57 +08:00
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
err = rdmsr_safe_regs_on_cpu(cpu, regs);
|
|
|
|
if (err)
|
|
|
|
break;
|
2018-10-28 20:58:28 +08:00
|
|
|
if (copy_to_user(uregs, ®s, sizeof(regs)))
|
2009-09-01 05:16:57 +08:00
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86_IOC_WRMSR_REGS:
|
|
|
|
if (!(file->f_mode & FMODE_WRITE)) {
|
|
|
|
err = -EBADF;
|
|
|
|
break;
|
|
|
|
}
|
2018-10-28 20:58:28 +08:00
|
|
|
if (copy_from_user(®s, uregs, sizeof(regs))) {
|
2009-09-01 05:16:57 +08:00
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
2019-08-20 08:17:49 +08:00
|
|
|
err = security_locked_down(LOCKDOWN_MSR);
|
|
|
|
if (err)
|
|
|
|
break;
|
2009-09-01 05:16:57 +08:00
|
|
|
err = wrmsr_safe_regs_on_cpu(cpu, regs);
|
|
|
|
if (err)
|
|
|
|
break;
|
2018-10-28 20:58:28 +08:00
|
|
|
if (copy_to_user(uregs, ®s, sizeof(regs)))
|
2009-09-01 05:16:57 +08:00
|
|
|
err = -EFAULT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
err = -ENOTTY;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static int msr_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
2013-02-28 05:59:05 +08:00
|
|
|
unsigned int cpu = iminor(file_inode(file));
|
2009-12-15 02:02:18 +08:00
|
|
|
struct cpuinfo_x86 *c;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-11-15 21:06:22 +08:00
|
|
|
if (!capable(CAP_SYS_RAWIO))
|
|
|
|
return -EPERM;
|
|
|
|
|
2009-10-08 03:43:22 +08:00
|
|
|
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
|
|
|
|
return -ENXIO; /* No such CPU */
|
|
|
|
|
2008-05-15 23:12:01 +08:00
|
|
|
c = &cpu_data(cpu);
|
|
|
|
if (!cpu_has(c, X86_FEATURE_MSR))
|
2009-10-08 03:43:22 +08:00
|
|
|
return -EIO; /* MSR not supported */
|
|
|
|
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* File operations we support
|
|
|
|
*/
|
2007-02-12 16:55:31 +08:00
|
|
|
static const struct file_operations msr_fops = {
|
2005-04-17 06:20:36 +08:00
|
|
|
.owner = THIS_MODULE,
|
2015-12-06 11:04:48 +08:00
|
|
|
.llseek = no_seek_end_llseek,
|
2005-04-17 06:20:36 +08:00
|
|
|
.read = msr_read,
|
|
|
|
.write = msr_write,
|
|
|
|
.open = msr_open,
|
2009-09-01 05:16:57 +08:00
|
|
|
.unlocked_ioctl = msr_ioctl,
|
|
|
|
.compat_ioctl = msr_ioctl,
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2016-11-18 02:35:24 +08:00
|
|
|
static int msr_device_create(unsigned int cpu)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2006-08-08 13:19:37 +08:00
|
|
|
struct device *dev;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-07-22 11:03:34 +08:00
|
|
|
dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL,
|
|
|
|
"msr%d", cpu);
|
2014-10-18 04:01:38 +08:00
|
|
|
return PTR_ERR_OR_ZERO(dev);
|
2007-10-18 18:05:14 +08:00
|
|
|
}
|
|
|
|
|
2016-11-18 02:35:24 +08:00
|
|
|
static int msr_device_destroy(unsigned int cpu)
|
2007-10-18 18:05:14 +08:00
|
|
|
{
|
|
|
|
device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
|
2016-11-18 02:35:24 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2011-07-24 08:24:48 +08:00
|
|
|
static char *msr_devnode(struct device *dev, umode_t *mode)
|
2009-04-30 21:23:42 +08:00
|
|
|
{
|
|
|
|
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static int __init msr_init(void)
|
|
|
|
{
|
2016-11-18 02:35:24 +08:00
|
|
|
int err;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-12-16 07:13:07 +08:00
|
|
|
if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) {
|
2014-10-18 04:01:50 +08:00
|
|
|
pr_err("unable to get major %d for msr\n", MSR_MAJOR);
|
2016-11-18 02:35:24 +08:00
|
|
|
return -EBUSY;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2005-03-24 01:56:34 +08:00
|
|
|
msr_class = class_create(THIS_MODULE, "msr");
|
2005-04-17 06:20:36 +08:00
|
|
|
if (IS_ERR(msr_class)) {
|
|
|
|
err = PTR_ERR(msr_class);
|
|
|
|
goto out_chrdev;
|
|
|
|
}
|
2009-09-19 05:01:12 +08:00
|
|
|
msr_class->devnode = msr_devnode;
|
2014-03-11 04:36:37 +08:00
|
|
|
|
2016-11-18 02:35:24 +08:00
|
|
|
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online",
|
|
|
|
msr_device_create, msr_device_destroy);
|
|
|
|
if (err < 0)
|
|
|
|
goto out_class;
|
|
|
|
cpuhp_msr_state = err;
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
out_class:
|
2005-03-24 01:56:34 +08:00
|
|
|
class_destroy(msr_class);
|
2005-04-17 06:20:36 +08:00
|
|
|
out_chrdev:
|
2009-12-16 07:13:07 +08:00
|
|
|
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
2005-04-17 06:20:36 +08:00
|
|
|
return err;
|
|
|
|
}
|
2016-11-18 02:35:24 +08:00
|
|
|
module_init(msr_init);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
static void __exit msr_exit(void)
|
|
|
|
{
|
2016-11-18 02:35:24 +08:00
|
|
|
cpuhp_remove_state(cpuhp_msr_state);
|
2005-03-24 01:56:34 +08:00
|
|
|
class_destroy(msr_class);
|
2010-01-27 10:37:22 +08:00
|
|
|
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
module_exit(msr_exit)
|
|
|
|
|
x86/msr: Filter MSR writes
Add functionality to disable writing to MSRs from userspace. Writes can
still be allowed by supplying the allow_writes=on module parameter. The
kernel will be tainted so that it shows in oopses.
Having unfettered access to all MSRs on a system is and has always been
a disaster waiting to happen. Think performance counter MSRs, MSRs with
sticky or locked bits, MSRs making major system changes like loading
microcode, MTRRs, PAT configuration, TSC counter, security mitigations
MSRs, you name it.
This also destroys all the kernel's caching of MSR values for
performance, as the recent case with MSR_AMD64_LS_CFG showed.
Another example is writing MSRs by mistake by simply typing the wrong
MSR address. System freezes have been experienced that way.
In general, poking at MSRs under the kernel's feet is a bad bad idea.
So log writing to MSRs by default. Longer term, such writes will be
disabled by default.
If userspace still wants to do that, then proper interfaces should be
defined which are under the kernel's control and accesses to those MSRs
can be synchronized and sanitized properly.
[ Fix sparse warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Link: https://lkml.kernel.org/r/20200612105026.GA22660@zn.tnic
2020-06-11 03:37:49 +08:00
|
|
|
static int set_allow_writes(const char *val, const struct kernel_param *cp)
|
|
|
|
{
|
|
|
|
/* val is NUL-terminated, see kernfs_fop_write() */
|
|
|
|
char *s = strstrip((char *)val);
|
|
|
|
|
|
|
|
if (!strcmp(s, "on"))
|
|
|
|
allow_writes = MSR_WRITES_ON;
|
|
|
|
else if (!strcmp(s, "off"))
|
|
|
|
allow_writes = MSR_WRITES_OFF;
|
|
|
|
else
|
|
|
|
allow_writes = MSR_WRITES_DEFAULT;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_allow_writes(char *buf, const struct kernel_param *kp)
|
|
|
|
{
|
|
|
|
const char *res;
|
|
|
|
|
|
|
|
switch (allow_writes) {
|
|
|
|
case MSR_WRITES_ON: res = "on"; break;
|
|
|
|
case MSR_WRITES_OFF: res = "off"; break;
|
|
|
|
default: res = "default"; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return sprintf(buf, "%s\n", res);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct kernel_param_ops allow_writes_ops = {
|
|
|
|
.set = set_allow_writes,
|
|
|
|
.get = get_allow_writes
|
|
|
|
};
|
|
|
|
|
|
|
|
module_param_cb(allow_writes, &allow_writes_ops, NULL, 0600);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
|
|
|
|
MODULE_DESCRIPTION("x86 generic MSR driver");
|
|
|
|
MODULE_LICENSE("GPL");
|