Support for "split lock" detection:
- Atomic operations (lock prefixed instructions) which span two cache lines have to acquire the global bus lock. This is at least 1k cycles slower than an atomic operation within a cache line and disrupts performance on other cores. Aside of performance disruption this is a unpriviledged form of DoS. Some newer CPUs have the capability to raise an #AC trap when such an operation is attempted. The detection is by default enabled in warning mode which will warn once when a user space application is caught. A command line option allows to disable the detection or to select fatal mode which will terminate offending applications with SIGBUS. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl6B/uMTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYocsAD/9yqpw+XlPKNPsfbm9sbirBDfTrENcL F44iwn4WnrjoW/gnnZCYmPxJFsTtGVPqxHdUf4eyGemg9r9ZEO0DQftmUHC5Z6KX aa/b5JoeM61wp9HlpVlD4D1jVt4pWyQODQeZnUXE4DEzmRc3cD/5lSU+/VeaIwwz lxwUemqmXK7ucH2KA7smOGsl2nU6ED84q3mdOB1b4Cw+gWYMUnPJnuS/ipriBRx4 BYbMItcxsFvtdO9Hx8PvGd5LUK0wW8JOWrYQICD2kLpZtHtGeaHpBzFzL0+nMU7d 1epyDqJQDmX+PAzvj+EYyn3HTfobZlckn+tbxMQkkS+oDk1ywOZd+BancClvn5/5 jMfPIQJF5bGASVnzGMWhzVdwthTZiMG4d1iKsUWOA/hN0ch0+rm1BqraToabsEFg Sv7/rvl9KtSOtMJTeAmMhlZUMBj9m8BtPFjniDwp6nw/upGgJdST5mrKFNYZvqOj JnXsEMr/nJVW6bnUvT6LF66xbHlzHdxtodkQWqF+IEsyRaOz1zAGpQamP98KxNLc dq/XYoEe1KqIFbg4BkNP+GeDL3FQDxjFNwPQnnjQEzWRbjkHlfmq1uKCsR2r8mBO fYNJ1X8lTyGV0kx/ERpWGazzabpzh+8Lr1yMhnoA3EWvlzUjmpN2PFI4oTpTrtzT c/q16SCxim3NWA== =D9x8 -----END PGP SIGNATURE----- Merge tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 splitlock updates from Thomas Gleixner: "Support for 'split lock' detection: Atomic operations (lock prefixed instructions) which span two cache lines have to acquire the global bus lock. This is at least 1k cycles slower than an atomic operation within a cache line and disrupts performance on other cores. Aside of performance disruption this is a unpriviledged form of DoS. Some newer CPUs have the capability to raise an #AC trap when such an operation is attempted. The detection is by default enabled in warning mode which will warn once when a user space application is caught. A command line option allows to disable the detection or to select fatal mode which will terminate offending applications with SIGBUS" * tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/split_lock: Avoid runtime reads of the TEST_CTRL MSR x86/split_lock: Rework the initialization flow of split lock detection x86/split_lock: Enable split lock detection by kernel
This commit is contained in:
commit
2853d5fafb
|
@ -4711,6 +4711,28 @@
|
|||
spia_pedr=
|
||||
spia_peddr=
|
||||
|
||||
split_lock_detect=
|
||||
[X86] Enable split lock detection
|
||||
|
||||
When enabled (and if hardware support is present), atomic
|
||||
instructions that access data across cache line
|
||||
boundaries will result in an alignment check exception.
|
||||
|
||||
off - not enabled
|
||||
|
||||
warn - the kernel will emit rate limited warnings
|
||||
about applications triggering the #AC
|
||||
exception. This mode is the default on CPUs
|
||||
that supports split lock detection.
|
||||
|
||||
fatal - the kernel will send SIGBUS to applications
|
||||
that trigger the #AC exception.
|
||||
|
||||
If an #AC exception is hit in the kernel or in
|
||||
firmware (i.e. not while executing in user mode)
|
||||
the kernel will oops in either "warn" or "fatal"
|
||||
mode.
|
||||
|
||||
srcutree.counter_wrap_check [KNL]
|
||||
Specifies how frequently to check for
|
||||
grace-period sequence counter wrap for the
|
||||
|
|
|
@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *);
|
|||
unsigned int x86_family(unsigned int sig);
|
||||
unsigned int x86_model(unsigned int sig);
|
||||
unsigned int x86_stepping(unsigned int sig);
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
|
||||
extern void switch_to_sld(unsigned long tifn);
|
||||
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
|
||||
#else
|
||||
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
|
||||
static inline void switch_to_sld(unsigned long tifn) {}
|
||||
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#endif /* _ASM_X86_CPU_H */
|
||||
|
|
|
@ -285,6 +285,7 @@
|
|||
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
|
||||
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
|
||||
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
|
||||
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
||||
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
|
||||
|
@ -368,6 +369,7 @@
|
|||
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
|
||||
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
|
||||
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
|
||||
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
|
||||
|
||||
/*
|
||||
|
|
|
@ -41,6 +41,10 @@
|
|||
|
||||
/* Intel MSRs. Some also available on other CPUs */
|
||||
|
||||
#define MSR_TEST_CTRL 0x00000033
|
||||
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
|
||||
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
|
||||
|
||||
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
|
||||
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
|
||||
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
|
||||
|
@ -70,6 +74,11 @@
|
|||
*/
|
||||
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
|
||||
|
||||
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
|
||||
#define MSR_IA32_CORE_CAPS 0x000000cf
|
||||
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
|
||||
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
|
||||
|
||||
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
|
||||
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
|
||||
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
|
||||
|
|
|
@ -92,6 +92,7 @@ struct thread_info {
|
|||
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
|
||||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* IA32 compatibility process */
|
||||
#define TIF_SLD 18 /* Restore split lock detection on context switch */
|
||||
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
||||
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
||||
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
||||
|
@ -121,6 +122,7 @@ struct thread_info {
|
|||
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
|
||||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_SLD (1 << TIF_SLD)
|
||||
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
||||
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
||||
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
||||
|
@ -139,7 +141,7 @@ struct thread_info {
|
|||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW_BASE \
|
||||
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
|
||||
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
|
||||
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
|
||||
|
||||
/*
|
||||
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
|
||||
|
|
|
@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
|||
|
||||
cpu_set_bug_bits(c);
|
||||
|
||||
cpu_set_core_cap_bits(c);
|
||||
|
||||
fpu__init_system(c);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#include <asm/microcode_intel.h>
|
||||
#include <asm/hwcap2.h>
|
||||
#include <asm/elf.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/cmdline.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
|
@ -31,6 +33,20 @@
|
|||
#include <asm/apic.h>
|
||||
#endif
|
||||
|
||||
enum split_lock_detect_state {
|
||||
sld_off = 0,
|
||||
sld_warn,
|
||||
sld_fatal,
|
||||
};
|
||||
|
||||
/*
|
||||
* Default to sld_off because most systems do not support split lock detection
|
||||
* split_lock_setup() will switch this to sld_warn on systems that support
|
||||
* split lock detect, unless there is a command line override.
|
||||
*/
|
||||
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
|
||||
static u64 msr_test_ctrl_cache __ro_after_init;
|
||||
|
||||
/*
|
||||
* Processors which have self-snooping capability can handle conflicting
|
||||
* memory type across CPUs by snooping its own cache. However, there exists
|
||||
|
@ -570,6 +586,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
|
|||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
|
||||
}
|
||||
|
||||
static void split_lock_init(void);
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
early_init_intel(c);
|
||||
|
@ -684,6 +702,8 @@ static void init_intel(struct cpuinfo_x86 *c)
|
|||
tsx_enable();
|
||||
if (tsx_ctrl_state == TSX_CTRL_DISABLE)
|
||||
tsx_disable();
|
||||
|
||||
split_lock_init();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -945,3 +965,166 @@ static const struct cpu_dev intel_cpu_dev = {
|
|||
};
|
||||
|
||||
cpu_dev_register(intel_cpu_dev);
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "x86/split lock detection: " fmt
|
||||
|
||||
static const struct {
|
||||
const char *option;
|
||||
enum split_lock_detect_state state;
|
||||
} sld_options[] __initconst = {
|
||||
{ "off", sld_off },
|
||||
{ "warn", sld_warn },
|
||||
{ "fatal", sld_fatal },
|
||||
};
|
||||
|
||||
static inline bool match_option(const char *arg, int arglen, const char *opt)
|
||||
{
|
||||
int len = strlen(opt);
|
||||
|
||||
return len == arglen && !strncmp(arg, opt, len);
|
||||
}
|
||||
|
||||
static bool split_lock_verify_msr(bool on)
|
||||
{
|
||||
u64 ctrl, tmp;
|
||||
|
||||
if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
|
||||
return false;
|
||||
if (on)
|
||||
ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
|
||||
else
|
||||
ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
|
||||
if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
|
||||
return false;
|
||||
rdmsrl(MSR_TEST_CTRL, tmp);
|
||||
return ctrl == tmp;
|
||||
}
|
||||
|
||||
static void __init split_lock_setup(void)
|
||||
{
|
||||
enum split_lock_detect_state state = sld_warn;
|
||||
char arg[20];
|
||||
int i, ret;
|
||||
|
||||
if (!split_lock_verify_msr(false)) {
|
||||
pr_info("MSR access failed: Disabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = cmdline_find_option(boot_command_line, "split_lock_detect",
|
||||
arg, sizeof(arg));
|
||||
if (ret >= 0) {
|
||||
for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
|
||||
if (match_option(arg, ret, sld_options[i].option)) {
|
||||
state = sld_options[i].state;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
case sld_off:
|
||||
pr_info("disabled\n");
|
||||
return;
|
||||
case sld_warn:
|
||||
pr_info("warning about user-space split_locks\n");
|
||||
break;
|
||||
case sld_fatal:
|
||||
pr_info("sending SIGBUS on user-space split_locks\n");
|
||||
break;
|
||||
}
|
||||
|
||||
rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
|
||||
|
||||
if (!split_lock_verify_msr(true)) {
|
||||
pr_info("MSR access failed: Disabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
sld_state = state;
|
||||
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
|
||||
}
|
||||
|
||||
/*
|
||||
* MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
|
||||
* is not implemented as one thread could undo the setting of the other
|
||||
* thread immediately after dropping the lock anyway.
|
||||
*/
|
||||
static void sld_update_msr(bool on)
|
||||
{
|
||||
u64 test_ctrl_val = msr_test_ctrl_cache;
|
||||
|
||||
if (on)
|
||||
test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
|
||||
|
||||
wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
|
||||
}
|
||||
|
||||
static void split_lock_init(void)
|
||||
{
|
||||
split_lock_verify_msr(sld_state != sld_off);
|
||||
}
|
||||
|
||||
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
|
||||
return false;
|
||||
|
||||
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
|
||||
current->comm, current->pid, regs->ip);
|
||||
|
||||
/*
|
||||
* Disable the split lock detection for this task so it can make
|
||||
* progress and set TIF_SLD so the detection is re-enabled via
|
||||
* switch_to_sld() when the task is scheduled out.
|
||||
*/
|
||||
sld_update_msr(false);
|
||||
set_tsk_thread_flag(current, TIF_SLD);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called only when switching between tasks with
|
||||
* different split-lock detection modes. It sets the MSR for the
|
||||
* mode of the new task. This is right most of the time, but since
|
||||
* the MSR is shared by hyperthreads on a physical core there can
|
||||
* be glitches when the two threads need different modes.
|
||||
*/
|
||||
void switch_to_sld(unsigned long tifn)
|
||||
{
|
||||
sld_update_msr(!(tifn & _TIF_SLD));
|
||||
}
|
||||
|
||||
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
|
||||
|
||||
/*
|
||||
* The following processors have the split lock detection feature. But
|
||||
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
|
||||
* be enumerated. Enable it by family and model matching on these
|
||||
* processors.
|
||||
*/
|
||||
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
|
||||
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
|
||||
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
|
||||
{}
|
||||
};
|
||||
|
||||
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 ia32_core_caps = 0;
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL)
|
||||
return;
|
||||
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
|
||||
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
|
||||
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
|
||||
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
|
||||
/* Enumerate split lock detection by family and model. */
|
||||
if (x86_match_cpu(split_lock_cpu_ids))
|
||||
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
|
||||
}
|
||||
|
||||
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
|
||||
split_lock_setup();
|
||||
}
|
||||
|
|
|
@ -649,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
/* Enforce MSR update to ensure consistent state */
|
||||
__speculation_ctrl_update(~tifn, tifn);
|
||||
}
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_SLD)
|
||||
switch_to_sld(tifn);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <asm/traps.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
|
||||
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
|
||||
return;
|
||||
|
||||
|
@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru
|
|||
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
|
||||
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
|
||||
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
|
||||
DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check)
|
||||
#undef IP
|
||||
|
||||
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
char *str = "alignment check";
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
if (!user_mode(regs))
|
||||
die("Split lock detected\n", regs, error_code);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
if (handle_user_split_lock(regs, error_code))
|
||||
return;
|
||||
|
||||
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
|
||||
error_code, BUS_ADRALN, NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
__visible void __noreturn handle_stack_overflow(const char *message,
|
||||
struct pt_regs *regs,
|
||||
|
|
Loading…
Reference in New Issue