Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - A comprehensive rewrite of the robust/PI futex code's exit handling to fix various exit races. (Thomas Gleixner et al) - Rework the generic REFCOUNT_FULL implementation using atomic_fetch_* operations so that the performance impact of the cmpxchg() loops is mitigated for common refcount operations. With these performance improvements the generic implementation of refcount_t should be good enough for everybody - and this got confirmed by performance testing, so remove ARCH_HAS_REFCOUNT and REFCOUNT_FULL entirely, leaving the generic implementation enabled unconditionally. (Will Deacon) - Other misc changes, fixes, cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) lkdtm: Remove references to CONFIG_REFCOUNT_FULL locking/refcount: Remove unused 'refcount_error_report()' function locking/refcount: Consolidate implementations of refcount_t locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions locking/refcount: Move saturation warnings out of line locking/refcount: Improve performance of generic REFCOUNT_FULL code locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the <linux/refcount.h> header locking/refcount: Remove unused refcount_*_checked() variants locking/refcount: Ensure integer operands are treated as signed locking/refcount: Define constants for saturation and max refcount values futex: Prevent exit livelock futex: Provide distinct return value when owner is exiting futex: Add mutex around futex exit futex: Provide state handling for exec() as well futex: Sanitize exit state handling futex: Mark the begin of futex exit explicitly futex: Set task::futex_state to DEAD right after handling futex exit futex: Split futex_mm_release() for exit/exec exit/exec: Seperate mm_release() futex: Replace PF_EXITPIDONE with a state ...
This commit is contained in:
commit
168829ad09
21
arch/Kconfig
21
arch/Kconfig
|
@ -892,27 +892,6 @@ config STRICT_MODULE_RWX
|
|||
config ARCH_HAS_PHYS_TO_DMA
|
||||
bool
|
||||
|
||||
config ARCH_HAS_REFCOUNT
|
||||
bool
|
||||
help
|
||||
An architecture selects this when it has implemented refcount_t
|
||||
using open coded assembly primitives that provide an optimized
|
||||
refcount_t implementation, possibly at the expense of some full
|
||||
refcount state checks of CONFIG_REFCOUNT_FULL=y.
|
||||
|
||||
The refcount overflow check behavior, however, must be retained.
|
||||
Catching overflows is the primary security concern for protecting
|
||||
against bugs in reference counts.
|
||||
|
||||
config REFCOUNT_FULL
|
||||
bool "Perform full reference count validation at the expense of speed"
|
||||
help
|
||||
Enabling this switches the refcounting infrastructure from a fast
|
||||
unchecked atomic_t implementation to a fully state checked
|
||||
implementation, which can be (slightly) slower but provides protections
|
||||
against various use-after-free conditions that can be used in
|
||||
security flaw exploits.
|
||||
|
||||
config HAVE_ARCH_COMPILER_H
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -117,7 +117,6 @@ config ARM
|
|||
select OLD_SIGSUSPEND3
|
||||
select PCI_SYSCALL if PCI
|
||||
select PERF_USE_VMALLOC
|
||||
select REFCOUNT_FULL
|
||||
select RTC_LIB
|
||||
select SYS_SUPPORTS_APM_EMULATION
|
||||
# Above selects are sorted alphabetically; please add new ones
|
||||
|
|
|
@ -182,7 +182,6 @@ config ARM64
|
|||
select PCI_SYSCALL if PCI
|
||||
select POWER_RESET
|
||||
select POWER_SUPPLY
|
||||
select REFCOUNT_FULL
|
||||
select SPARSE_IRQ
|
||||
select SWIOTLB
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
|
|
|
@ -62,7 +62,6 @@ CONFIG_OPROFILE=m
|
|||
CONFIG_KPROBES=y
|
||||
CONFIG_JUMP_LABEL=y
|
||||
CONFIG_STATIC_KEYS_SELFTEST=y
|
||||
CONFIG_REFCOUNT_FULL=y
|
||||
CONFIG_LOCK_EVENT_COUNTS=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_FORCE_LOAD=y
|
||||
|
|
|
@ -73,7 +73,6 @@ config X86
|
|||
select ARCH_HAS_PMEM_API if X86_64
|
||||
select ARCH_HAS_PTE_DEVMAP if X86_64
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
select ARCH_HAS_REFCOUNT
|
||||
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
||||
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
|
|
|
@ -141,9 +141,6 @@
|
|||
# define _ASM_EXTABLE_EX(from, to) \
|
||||
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
|
||||
|
||||
# define _ASM_EXTABLE_REFCOUNT(from, to) \
|
||||
_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
|
||||
|
||||
# define _ASM_NOKPROBE(entry) \
|
||||
.pushsection "_kprobe_blacklist","aw" ; \
|
||||
_ASM_ALIGN ; \
|
||||
|
@ -172,9 +169,6 @@
|
|||
# define _ASM_EXTABLE_EX(from, to) \
|
||||
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
|
||||
|
||||
# define _ASM_EXTABLE_REFCOUNT(from, to) \
|
||||
_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
|
||||
|
||||
/* For C file, we already have NOKPROBE_SYMBOL macro */
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,126 +0,0 @@
|
|||
#ifndef __ASM_X86_REFCOUNT_H
|
||||
#define __ASM_X86_REFCOUNT_H
|
||||
/*
|
||||
* x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
|
||||
* PaX/grsecurity.
|
||||
*/
|
||||
#include <linux/refcount.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
/*
|
||||
* This is the first portion of the refcount error handling, which lives in
|
||||
* .text.unlikely, and is jumped to from the CPU flag check (in the
|
||||
* following macros). This saves the refcount value location into CX for
|
||||
* the exception handler to use (in mm/extable.c), and then triggers the
|
||||
* central refcount exception. The fixup address for the exception points
|
||||
* back to the regular execution flow in .text.
|
||||
*/
|
||||
#define _REFCOUNT_EXCEPTION \
|
||||
".pushsection .text..refcount\n" \
|
||||
"111:\tlea %[var], %%" _ASM_CX "\n" \
|
||||
"112:\t" ASM_UD2 "\n" \
|
||||
ASM_UNREACHABLE \
|
||||
".popsection\n" \
|
||||
"113:\n" \
|
||||
_ASM_EXTABLE_REFCOUNT(112b, 113b)
|
||||
|
||||
/* Trigger refcount exception if refcount result is negative. */
|
||||
#define REFCOUNT_CHECK_LT_ZERO \
|
||||
"js 111f\n\t" \
|
||||
_REFCOUNT_EXCEPTION
|
||||
|
||||
/* Trigger refcount exception if refcount result is zero or negative. */
|
||||
#define REFCOUNT_CHECK_LE_ZERO \
|
||||
"jz 111f\n\t" \
|
||||
REFCOUNT_CHECK_LT_ZERO
|
||||
|
||||
/* Trigger refcount exception unconditionally. */
|
||||
#define REFCOUNT_ERROR \
|
||||
"jmp 111f\n\t" \
|
||||
_REFCOUNT_EXCEPTION
|
||||
|
||||
static __always_inline void refcount_add(unsigned int i, refcount_t *r)
|
||||
{
|
||||
asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
|
||||
REFCOUNT_CHECK_LT_ZERO
|
||||
: [var] "+m" (r->refs.counter)
|
||||
: "ir" (i)
|
||||
: "cc", "cx");
|
||||
}
|
||||
|
||||
static __always_inline void refcount_inc(refcount_t *r)
|
||||
{
|
||||
asm volatile(LOCK_PREFIX "incl %0\n\t"
|
||||
REFCOUNT_CHECK_LT_ZERO
|
||||
: [var] "+m" (r->refs.counter)
|
||||
: : "cc", "cx");
|
||||
}
|
||||
|
||||
static __always_inline void refcount_dec(refcount_t *r)
|
||||
{
|
||||
asm volatile(LOCK_PREFIX "decl %0\n\t"
|
||||
REFCOUNT_CHECK_LE_ZERO
|
||||
: [var] "+m" (r->refs.counter)
|
||||
: : "cc", "cx");
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
||||
{
|
||||
bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
|
||||
REFCOUNT_CHECK_LT_ZERO,
|
||||
r->refs.counter, e, "er", i, "cx");
|
||||
|
||||
if (ret) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
|
||||
{
|
||||
bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
|
||||
REFCOUNT_CHECK_LT_ZERO,
|
||||
r->refs.counter, e, "cx");
|
||||
|
||||
if (ret) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
bool refcount_add_not_zero(unsigned int i, refcount_t *r)
|
||||
{
|
||||
int c, result;
|
||||
|
||||
c = atomic_read(&(r->refs));
|
||||
do {
|
||||
if (unlikely(c == 0))
|
||||
return false;
|
||||
|
||||
result = c + i;
|
||||
|
||||
/* Did we try to increment from/to an undesirable state? */
|
||||
if (unlikely(c < 0 || c == INT_MAX || result < c)) {
|
||||
asm volatile(REFCOUNT_ERROR
|
||||
: : [var] "m" (r->refs.counter)
|
||||
: "cc", "cx");
|
||||
break;
|
||||
}
|
||||
|
||||
} while (!atomic_try_cmpxchg(&(r->refs), &c, result));
|
||||
|
||||
return c != 0;
|
||||
}
|
||||
|
||||
static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
|
||||
{
|
||||
return refcount_add_not_zero(1, r);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -44,55 +44,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(ex_handler_fault);
|
||||
|
||||
/*
|
||||
* Handler for UD0 exception following a failed test against the
|
||||
* result of a refcount inc/dec/add/sub.
|
||||
*/
|
||||
__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
|
||||
struct pt_regs *regs, int trapnr,
|
||||
unsigned long error_code,
|
||||
unsigned long fault_addr)
|
||||
{
|
||||
/* First unconditionally saturate the refcount. */
|
||||
*(int *)regs->cx = INT_MIN / 2;
|
||||
|
||||
/*
|
||||
* Strictly speaking, this reports the fixup destination, not
|
||||
* the fault location, and not the actually overflowing
|
||||
* instruction, which is the instruction before the "js", but
|
||||
* since that instruction could be a variety of lengths, just
|
||||
* report the location after the overflow, which should be close
|
||||
* enough for finding the overflow, as it's at least back in
|
||||
* the function, having returned from .text.unlikely.
|
||||
*/
|
||||
regs->ip = ex_fixup_addr(fixup);
|
||||
|
||||
/*
|
||||
* This function has been called because either a negative refcount
|
||||
* value was seen by any of the refcount functions, or a zero
|
||||
* refcount value was seen by refcount_dec().
|
||||
*
|
||||
* If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
|
||||
* wrapped around) will be set. Additionally, seeing the refcount
|
||||
* reach 0 will set ZF (Zero Flag: result was zero). In each of
|
||||
* these cases we want a report, since it's a boundary condition.
|
||||
* The SF case is not reported since it indicates post-boundary
|
||||
* manipulations below zero or above INT_MAX. And if none of the
|
||||
* flags are set, something has gone very wrong, so report it.
|
||||
*/
|
||||
if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
|
||||
bool zero = regs->flags & X86_EFLAGS_ZF;
|
||||
|
||||
refcount_error_report(regs, zero ? "hit zero" : "overflow");
|
||||
} else if ((regs->flags & X86_EFLAGS_SF) == 0) {
|
||||
/* Report if none of OF, ZF, nor SF are set. */
|
||||
refcount_error_report(regs, "unexpected saturation");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(ex_handler_refcount);
|
||||
|
||||
/*
|
||||
* Handler for when we fail to restore a task's FPU state. We should never get
|
||||
* here because the FPU state of a task using the FPU (task->thread.fpu.state)
|
||||
|
|
|
@ -719,7 +719,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
|
|||
__drm_connector_put_safe(iter->conn);
|
||||
spin_unlock_irqrestore(&config->connector_list_lock, flags);
|
||||
}
|
||||
lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
|
||||
lock_release(&connector_list_iter_dep_map, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(drm_connector_list_iter_end);
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@ config DRM_I915_DEBUG
|
|||
depends on DRM_I915
|
||||
select DEBUG_FS
|
||||
select PREEMPT_COUNT
|
||||
select REFCOUNT_FULL
|
||||
select I2C_CHARDEV
|
||||
select STACKDEPOT
|
||||
select DRM_DP_AUX_CHARDEV
|
||||
|
|
|
@ -509,14 +509,14 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
|
|||
I915_MM_SHRINKER, 0, _RET_IP_);
|
||||
|
||||
mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
|
||||
mutex_release(&mutex->dep_map, 0, _RET_IP_);
|
||||
mutex_release(&mutex->dep_map, _RET_IP_);
|
||||
|
||||
mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
|
||||
mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
|
||||
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
|
||||
if (unlock)
|
||||
mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
|
||||
mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
|
||||
}
|
||||
|
||||
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
|
||||
|
|
|
@ -52,7 +52,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
|
|||
static inline void __timeline_mark_unlock(struct intel_context *ce,
|
||||
unsigned long flags)
|
||||
{
|
||||
mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
|
||||
mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -1495,7 +1495,7 @@ long i915_request_wait(struct i915_request *rq,
|
|||
dma_fence_remove_callback(&rq->fence, &wait.cb);
|
||||
|
||||
out:
|
||||
mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
|
||||
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
|
||||
trace_i915_request_wait_end(rq);
|
||||
return timeout;
|
||||
}
|
||||
|
|
|
@ -6,14 +6,6 @@
|
|||
#include "lkdtm.h"
|
||||
#include <linux/refcount.h>
|
||||
|
||||
#ifdef CONFIG_REFCOUNT_FULL
|
||||
#define REFCOUNT_MAX (UINT_MAX - 1)
|
||||
#define REFCOUNT_SATURATED UINT_MAX
|
||||
#else
|
||||
#define REFCOUNT_MAX INT_MAX
|
||||
#define REFCOUNT_SATURATED (INT_MIN / 2)
|
||||
#endif
|
||||
|
||||
static void overflow_check(refcount_t *ref)
|
||||
{
|
||||
switch (refcount_read(ref)) {
|
||||
|
@ -127,7 +119,7 @@ void lkdtm_REFCOUNT_DEC_ZERO(void)
|
|||
static void check_negative(refcount_t *ref, int start)
|
||||
{
|
||||
/*
|
||||
* CONFIG_REFCOUNT_FULL refuses to move a refcount at all on an
|
||||
* refcount_t refuses to move a refcount at all on an
|
||||
* over-sub, so we have to track our starting position instead of
|
||||
* looking only at zero-pinning.
|
||||
*/
|
||||
|
@ -210,7 +202,6 @@ static void check_from_zero(refcount_t *ref)
|
|||
|
||||
/*
|
||||
* A refcount_inc() from zero should pin to zero or saturate and may WARN.
|
||||
* Only CONFIG_REFCOUNT_FULL provides this protection currently.
|
||||
*/
|
||||
void lkdtm_REFCOUNT_INC_ZERO(void)
|
||||
{
|
||||
|
|
|
@ -303,7 +303,7 @@ static int __ldsem_down_read_nested(struct ld_semaphore *sem,
|
|||
if (count <= 0) {
|
||||
lock_contended(&sem->dep_map, _RET_IP_);
|
||||
if (!down_read_failed(sem, count, timeout)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -322,7 +322,7 @@ static int __ldsem_down_write_nested(struct ld_semaphore *sem,
|
|||
if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) {
|
||||
lock_contended(&sem->dep_map, _RET_IP_);
|
||||
if (!down_write_failed(sem, count, timeout)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -390,7 +390,7 @@ void ldsem_up_read(struct ld_semaphore *sem)
|
|||
{
|
||||
long count;
|
||||
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
|
||||
count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count);
|
||||
if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0)
|
||||
|
@ -404,7 +404,7 @@ void ldsem_up_write(struct ld_semaphore *sem)
|
|||
{
|
||||
long count;
|
||||
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
|
||||
count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count);
|
||||
if (count < 0)
|
||||
|
|
|
@ -1319,7 +1319,7 @@ resume:
|
|||
|
||||
if (!list_empty(&dentry->d_subdirs)) {
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
|
||||
spin_release(&dentry->d_lock.dep_map, _RET_IP_);
|
||||
this_parent = dentry;
|
||||
spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
|
||||
goto repeat;
|
||||
|
|
|
@ -1015,7 +1015,7 @@ static int exec_mmap(struct mm_struct *mm)
|
|||
/* Notify parent that we're no longer interested in the old VM */
|
||||
tsk = current;
|
||||
old_mm = current->mm;
|
||||
mm_release(tsk, old_mm);
|
||||
exec_mm_release(tsk, old_mm);
|
||||
|
||||
if (old_mm) {
|
||||
sync_mm_rss(old_mm);
|
||||
|
|
|
@ -713,7 +713,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
|
|||
if (need_to_start)
|
||||
jbd2_log_start_commit(journal, tid);
|
||||
|
||||
rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
|
||||
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
|
||||
handle->h_buffer_credits = nblocks;
|
||||
/*
|
||||
* Restore the original nofs context because the journal restart
|
||||
|
@ -1848,7 +1848,7 @@ int jbd2_journal_stop(handle_t *handle)
|
|||
wake_up(&journal->j_wait_transaction_locked);
|
||||
}
|
||||
|
||||
rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
|
||||
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
|
||||
|
||||
if (wait_for_commit)
|
||||
err = jbd2_log_wait_commit(journal, tid);
|
||||
|
|
|
@ -438,7 +438,7 @@ void kernfs_put_active(struct kernfs_node *kn)
|
|||
return;
|
||||
|
||||
if (kernfs_lockdep(kn))
|
||||
rwsem_release(&kn->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&kn->dep_map, _RET_IP_);
|
||||
v = atomic_dec_return(&kn->active);
|
||||
if (likely(v != KN_DEACTIVATED_BIAS))
|
||||
return;
|
||||
|
@ -476,7 +476,7 @@ static void kernfs_drain(struct kernfs_node *kn)
|
|||
|
||||
if (kernfs_lockdep(kn)) {
|
||||
lock_acquired(&kn->dep_map, _RET_IP_);
|
||||
rwsem_release(&kn->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&kn->dep_map, _RET_IP_);
|
||||
}
|
||||
|
||||
kernfs_drain_open_files(kn);
|
||||
|
|
|
@ -1687,7 +1687,7 @@ static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
|
|||
spin_unlock_irqrestore(&lockres->l_lock, flags);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
if (lockres->l_lockdep_map.key != NULL)
|
||||
rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
|
||||
rwsem_release(&lockres->l_lockdep_map, caller_ip);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -410,8 +410,6 @@ struct compat_kexec_segment;
|
|||
struct compat_mq_attr;
|
||||
struct compat_msgbuf;
|
||||
|
||||
extern void compat_exit_robust_list(struct task_struct *curr);
|
||||
|
||||
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
|
||||
|
||||
#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
#ifndef _LINUX_FUTEX_H
|
||||
#define _LINUX_FUTEX_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ktime.h>
|
||||
|
||||
#include <uapi/linux/futex.h>
|
||||
|
||||
struct inode;
|
||||
|
@ -48,15 +50,35 @@ union futex_key {
|
|||
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
|
||||
|
||||
#ifdef CONFIG_FUTEX
|
||||
extern void exit_robust_list(struct task_struct *curr);
|
||||
enum {
|
||||
FUTEX_STATE_OK,
|
||||
FUTEX_STATE_EXITING,
|
||||
FUTEX_STATE_DEAD,
|
||||
};
|
||||
|
||||
static inline void futex_init_task(struct task_struct *tsk)
|
||||
{
|
||||
tsk->robust_list = NULL;
|
||||
#ifdef CONFIG_COMPAT
|
||||
tsk->compat_robust_list = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&tsk->pi_state_list);
|
||||
tsk->pi_state_cache = NULL;
|
||||
tsk->futex_state = FUTEX_STATE_OK;
|
||||
mutex_init(&tsk->futex_exit_mutex);
|
||||
}
|
||||
|
||||
void futex_exit_recursive(struct task_struct *tsk);
|
||||
void futex_exit_release(struct task_struct *tsk);
|
||||
void futex_exec_release(struct task_struct *tsk);
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
u32 __user *uaddr2, u32 val2, u32 val3);
|
||||
#else
|
||||
static inline void exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void futex_init_task(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_recursive(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_release(struct task_struct *tsk) { }
|
||||
static inline void futex_exec_release(struct task_struct *tsk) { }
|
||||
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
||||
ktime_t *timeout, u32 __user *uaddr2,
|
||||
u32 val2, u32 val3)
|
||||
|
@ -65,12 +87,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUTEX_PI
|
||||
extern void exit_pi_state_list(struct task_struct *curr);
|
||||
#else
|
||||
static inline void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1170,7 +1170,7 @@ struct journal_s
|
|||
#define jbd2_might_wait_for_commit(j) \
|
||||
do { \
|
||||
rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \
|
||||
rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \
|
||||
rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
/* journal feature predicate functions */
|
||||
|
|
|
@ -328,13 +328,6 @@ extern int oops_may_print(void);
|
|||
void do_exit(long error_code) __noreturn;
|
||||
void complete_and_exit(struct completion *, long) __noreturn;
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_REFCOUNT
|
||||
void refcount_error_report(struct pt_regs *regs, const char *err);
|
||||
#else
|
||||
static inline void refcount_error_report(struct pt_regs *regs, const char *err)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
/* Internal, do not use. */
|
||||
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
|
||||
int __must_check _kstrtol(const char *s, unsigned int base, long *res);
|
||||
|
|
|
@ -349,8 +349,7 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
|||
int trylock, int read, int check,
|
||||
struct lockdep_map *nest_lock, unsigned long ip);
|
||||
|
||||
extern void lock_release(struct lockdep_map *lock, int nested,
|
||||
unsigned long ip);
|
||||
extern void lock_release(struct lockdep_map *lock, unsigned long ip);
|
||||
|
||||
/*
|
||||
* Same "read" as for lock_acquire(), except -1 means any.
|
||||
|
@ -428,7 +427,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
|
|||
}
|
||||
|
||||
# define lock_acquire(l, s, t, r, c, n, i) do { } while (0)
|
||||
# define lock_release(l, n, i) do { } while (0)
|
||||
# define lock_release(l, i) do { } while (0)
|
||||
# define lock_downgrade(l, i) do { } while (0)
|
||||
# define lock_set_class(l, n, k, s, i) do { } while (0)
|
||||
# define lock_set_subclass(l, s, i) do { } while (0)
|
||||
|
@ -591,42 +590,42 @@ static inline void print_irqtrace_events(struct task_struct *curr)
|
|||
|
||||
#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
|
||||
#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
|
||||
#define spin_release(l, n, i) lock_release(l, n, i)
|
||||
#define spin_release(l, i) lock_release(l, i)
|
||||
|
||||
#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
|
||||
#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
|
||||
#define rwlock_release(l, n, i) lock_release(l, n, i)
|
||||
#define rwlock_release(l, i) lock_release(l, i)
|
||||
|
||||
#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
|
||||
#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
|
||||
#define seqcount_release(l, n, i) lock_release(l, n, i)
|
||||
#define seqcount_release(l, i) lock_release(l, i)
|
||||
|
||||
#define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
|
||||
#define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
|
||||
#define mutex_release(l, n, i) lock_release(l, n, i)
|
||||
#define mutex_release(l, i) lock_release(l, i)
|
||||
|
||||
#define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
|
||||
#define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
|
||||
#define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i)
|
||||
#define rwsem_release(l, n, i) lock_release(l, n, i)
|
||||
#define rwsem_release(l, i) lock_release(l, i)
|
||||
|
||||
#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
|
||||
#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
|
||||
#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
|
||||
#define lock_map_release(l) lock_release(l, 1, _THIS_IP_)
|
||||
#define lock_map_release(l) lock_release(l, _THIS_IP_)
|
||||
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
# define might_lock(lock) \
|
||||
do { \
|
||||
typecheck(struct lockdep_map *, &(lock)->dep_map); \
|
||||
lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \
|
||||
lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
|
||||
lock_release(&(lock)->dep_map, _THIS_IP_); \
|
||||
} while (0)
|
||||
# define might_lock_read(lock) \
|
||||
do { \
|
||||
typecheck(struct lockdep_map *, &(lock)->dep_map); \
|
||||
lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \
|
||||
lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
|
||||
lock_release(&(lock)->dep_map, _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
#define lockdep_assert_irqs_enabled() do { \
|
||||
|
|
|
@ -93,7 +93,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
|
|||
__percpu_up_read(sem); /* Unconditional memory barrier */
|
||||
preempt_enable();
|
||||
|
||||
rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->rw_sem.dep_map, _RET_IP_);
|
||||
}
|
||||
|
||||
extern void percpu_down_write(struct percpu_rw_semaphore *);
|
||||
|
@ -118,7 +118,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
|
|||
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
|
||||
bool read, unsigned long ip)
|
||||
{
|
||||
lock_release(&sem->rw_sem.dep_map, 1, ip);
|
||||
lock_release(&sem->rw_sem.dep_map, ip);
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
if (!read)
|
||||
atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
|
||||
|
|
|
@ -210,7 +210,7 @@ static inline void rcu_lock_acquire(struct lockdep_map *map)
|
|||
|
||||
static inline void rcu_lock_release(struct lockdep_map *map)
|
||||
{
|
||||
lock_release(map, 1, _THIS_IP_);
|
||||
lock_release(map, _THIS_IP_);
|
||||
}
|
||||
|
||||
extern struct lockdep_map rcu_lock_map;
|
||||
|
|
|
@ -1,9 +1,88 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Variant of atomic_t specialized for reference counts.
|
||||
*
|
||||
* The interface matches the atomic_t interface (to aid in porting) but only
|
||||
* provides the few functions one should use for reference counting.
|
||||
*
|
||||
* Saturation semantics
|
||||
* ====================
|
||||
*
|
||||
* refcount_t differs from atomic_t in that the counter saturates at
|
||||
* REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
|
||||
* counter and causing 'spurious' use-after-free issues. In order to avoid the
|
||||
* cost associated with introducing cmpxchg() loops into all of the saturating
|
||||
* operations, we temporarily allow the counter to take on an unchecked value
|
||||
* and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
|
||||
* or overflow has occurred. Although this is racy when multiple threads
|
||||
* access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
|
||||
* equidistant from 0 and INT_MAX we minimise the scope for error:
|
||||
*
|
||||
* INT_MAX REFCOUNT_SATURATED UINT_MAX
|
||||
* 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff)
|
||||
* +--------------------------------+----------------+----------------+
|
||||
* <---------- bad value! ---------->
|
||||
*
|
||||
* (in a signed view of the world, the "bad value" range corresponds to
|
||||
* a negative counter value).
|
||||
*
|
||||
* As an example, consider a refcount_inc() operation that causes the counter
|
||||
* to overflow:
|
||||
*
|
||||
* int old = atomic_fetch_add_relaxed(r);
|
||||
* // old is INT_MAX, refcount now INT_MIN (0x8000_0000)
|
||||
* if (old < 0)
|
||||
* atomic_set(r, REFCOUNT_SATURATED);
|
||||
*
|
||||
* If another thread also performs a refcount_inc() operation between the two
|
||||
* atomic operations, then the count will continue to edge closer to 0. If it
|
||||
* reaches a value of 1 before /any/ of the threads reset it to the saturated
|
||||
* value, then a concurrent refcount_dec_and_test() may erroneously free the
|
||||
* underlying object. Given the precise timing details involved with the
|
||||
* round-robin scheduling of each thread manipulating the refcount and the need
|
||||
* to hit the race multiple times in succession, there doesn't appear to be a
|
||||
* practical avenue of attack even if using refcount_add() operations with
|
||||
* larger increments.
|
||||
*
|
||||
* Memory ordering
|
||||
* ===============
|
||||
*
|
||||
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
|
||||
* and provide only what is strictly required for refcounts.
|
||||
*
|
||||
* The increments are fully relaxed; these will not provide ordering. The
|
||||
* rationale is that whatever is used to obtain the object we're increasing the
|
||||
* reference count on will provide the ordering. For locked data structures,
|
||||
* its the lock acquire, for RCU/lockless data structures its the dependent
|
||||
* load.
|
||||
*
|
||||
* Do note that inc_not_zero() provides a control dependency which will order
|
||||
* future stores against the inc, this ensures we'll never modify the object
|
||||
* if we did not in fact acquire a reference.
|
||||
*
|
||||
* The decrements will provide release order, such that all the prior loads and
|
||||
* stores will be issued before, it also provides a control dependency, which
|
||||
* will order us against the subsequent free().
|
||||
*
|
||||
* The control dependency is against the load of the cmpxchg (ll/sc) that
|
||||
* succeeded. This means the stores aren't fully ordered, but this is fine
|
||||
* because the 1->0 transition indicates no concurrency.
|
||||
*
|
||||
* Note that the allocator is responsible for ordering things between free()
|
||||
* and alloc().
|
||||
*
|
||||
* The decrements dec_and_test() and sub_and_test() also provide acquire
|
||||
* ordering on success.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_REFCOUNT_H
|
||||
#define _LINUX_REFCOUNT_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/spinlock_types.h>
|
||||
|
||||
struct mutex;
|
||||
|
@ -12,7 +91,7 @@ struct mutex;
|
|||
* struct refcount_t - variant of atomic_t specialized for reference counts
|
||||
* @refs: atomic_t counter field
|
||||
*
|
||||
* The counter saturates at UINT_MAX and will not move once
|
||||
* The counter saturates at REFCOUNT_SATURATED and will not move once
|
||||
* there. This avoids wrapping the counter and causing 'spurious'
|
||||
* use-after-free bugs.
|
||||
*/
|
||||
|
@ -21,13 +100,25 @@ typedef struct refcount_struct {
|
|||
} refcount_t;
|
||||
|
||||
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
|
||||
#define REFCOUNT_MAX INT_MAX
|
||||
#define REFCOUNT_SATURATED (INT_MIN / 2)
|
||||
|
||||
enum refcount_saturation_type {
|
||||
REFCOUNT_ADD_NOT_ZERO_OVF,
|
||||
REFCOUNT_ADD_OVF,
|
||||
REFCOUNT_ADD_UAF,
|
||||
REFCOUNT_SUB_UAF,
|
||||
REFCOUNT_DEC_LEAK,
|
||||
};
|
||||
|
||||
void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);
|
||||
|
||||
/**
|
||||
* refcount_set - set a refcount's value
|
||||
* @r: the refcount
|
||||
* @n: value to which the refcount will be set
|
||||
*/
|
||||
static inline void refcount_set(refcount_t *r, unsigned int n)
|
||||
static inline void refcount_set(refcount_t *r, int n)
|
||||
{
|
||||
atomic_set(&r->refs, n);
|
||||
}
|
||||
|
@ -43,70 +134,168 @@ static inline unsigned int refcount_read(const refcount_t *r)
|
|||
return atomic_read(&r->refs);
|
||||
}
|
||||
|
||||
extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r);
|
||||
extern void refcount_add_checked(unsigned int i, refcount_t *r);
|
||||
|
||||
extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r);
|
||||
extern void refcount_inc_checked(refcount_t *r);
|
||||
|
||||
extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r);
|
||||
|
||||
extern __must_check bool refcount_dec_and_test_checked(refcount_t *r);
|
||||
extern void refcount_dec_checked(refcount_t *r);
|
||||
|
||||
#ifdef CONFIG_REFCOUNT_FULL
|
||||
|
||||
#define refcount_add_not_zero refcount_add_not_zero_checked
|
||||
#define refcount_add refcount_add_checked
|
||||
|
||||
#define refcount_inc_not_zero refcount_inc_not_zero_checked
|
||||
#define refcount_inc refcount_inc_checked
|
||||
|
||||
#define refcount_sub_and_test refcount_sub_and_test_checked
|
||||
|
||||
#define refcount_dec_and_test refcount_dec_and_test_checked
|
||||
#define refcount_dec refcount_dec_checked
|
||||
|
||||
#else
|
||||
# ifdef CONFIG_ARCH_HAS_REFCOUNT
|
||||
# include <asm/refcount.h>
|
||||
# else
|
||||
static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
|
||||
/**
|
||||
* refcount_add_not_zero - add a value to a refcount unless it is 0
|
||||
* @i: the value to add to the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Will saturate at REFCOUNT_SATURATED and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_inc(), or one of its variants, should instead be used to
|
||||
* increment a reference count.
|
||||
*
|
||||
* Return: false if the passed refcount is 0, true otherwise
|
||||
*/
|
||||
static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
|
||||
{
|
||||
return atomic_add_unless(&r->refs, i, 0);
|
||||
int old = refcount_read(r);
|
||||
|
||||
do {
|
||||
if (!old)
|
||||
break;
|
||||
} while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));
|
||||
|
||||
if (unlikely(old < 0 || old + i < 0))
|
||||
refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
static inline void refcount_add(unsigned int i, refcount_t *r)
|
||||
/**
|
||||
* refcount_add - add a value to a refcount
|
||||
* @i: the value to add to the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_inc(), or one of its variants, should instead be used to
|
||||
* increment a reference count.
|
||||
*/
|
||||
static inline void refcount_add(int i, refcount_t *r)
|
||||
{
|
||||
atomic_add(i, &r->refs);
|
||||
int old = atomic_fetch_add_relaxed(i, &r->refs);
|
||||
|
||||
if (unlikely(!old))
|
||||
refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
|
||||
else if (unlikely(old < 0 || old + i < 0))
|
||||
refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
|
||||
}
|
||||
|
||||
/**
|
||||
* refcount_inc_not_zero - increment a refcount unless it is 0
|
||||
* @r: the refcount to increment
|
||||
*
|
||||
* Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
|
||||
* and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Return: true if the increment was successful, false otherwise
|
||||
*/
|
||||
static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
|
||||
{
|
||||
return atomic_add_unless(&r->refs, 1, 0);
|
||||
return refcount_add_not_zero(1, r);
|
||||
}
|
||||
|
||||
/**
|
||||
* refcount_inc - increment a refcount
|
||||
* @r: the refcount to increment
|
||||
*
|
||||
* Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller already has a
|
||||
* reference on the object.
|
||||
*
|
||||
* Will WARN if the refcount is 0, as this represents a possible use-after-free
|
||||
* condition.
|
||||
*/
|
||||
static inline void refcount_inc(refcount_t *r)
|
||||
{
|
||||
atomic_inc(&r->refs);
|
||||
refcount_add(1, r);
|
||||
}
|
||||
|
||||
static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
||||
/**
|
||||
* refcount_sub_and_test - subtract from a refcount and test if it is 0
|
||||
* @i: amount to subtract from the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec_and_test(), but it will WARN, return false and
|
||||
* ultimately leak on underflow and will fail to decrement when saturated
|
||||
* at REFCOUNT_SATURATED.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides an acquire ordering on success such that free()
|
||||
* must come after.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_dec(), or one of its variants, should instead be used to
|
||||
* decrement a reference count.
|
||||
*
|
||||
* Return: true if the resulting refcount is 0, false otherwise
|
||||
*/
|
||||
static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
|
||||
{
|
||||
return atomic_sub_and_test(i, &r->refs);
|
||||
int old = atomic_fetch_sub_release(i, &r->refs);
|
||||
|
||||
if (old == i) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(old < 0 || old - i < 0))
|
||||
refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* refcount_dec_and_test - decrement a refcount and test if it is 0
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
|
||||
* decrement when saturated at REFCOUNT_SATURATED.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides an acquire ordering on success such that free()
|
||||
* must come after.
|
||||
*
|
||||
* Return: true if the resulting refcount is 0, false otherwise
|
||||
*/
|
||||
static inline __must_check bool refcount_dec_and_test(refcount_t *r)
|
||||
{
|
||||
return atomic_dec_and_test(&r->refs);
|
||||
return refcount_sub_and_test(1, r);
|
||||
}
|
||||
|
||||
/**
|
||||
* refcount_dec - decrement a refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec(), it will WARN on underflow and fail to decrement
|
||||
* when saturated at REFCOUNT_SATURATED.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before.
|
||||
*/
|
||||
static inline void refcount_dec(refcount_t *r)
|
||||
{
|
||||
atomic_dec(&r->refs);
|
||||
if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1))
|
||||
refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
|
||||
}
|
||||
# endif /* !CONFIG_ARCH_HAS_REFCOUNT */
|
||||
#endif /* CONFIG_REFCOUNT_FULL */
|
||||
|
||||
extern __must_check bool refcount_dec_if_one(refcount_t *r);
|
||||
extern __must_check bool refcount_dec_not_one(refcount_t *r);
|
||||
|
|
|
@ -215,14 +215,14 @@ static inline void __raw_write_lock(rwlock_t *lock)
|
|||
|
||||
static inline void __raw_write_unlock(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_write_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void __raw_read_unlock(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_read_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ static inline void __raw_read_unlock(rwlock_t *lock)
|
|||
static inline void
|
||||
__raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_read_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
|
@ -238,7 +238,7 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
|||
|
||||
static inline void __raw_read_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_read_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
|
@ -246,7 +246,7 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock)
|
|||
|
||||
static inline void __raw_read_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_read_unlock(lock);
|
||||
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock)
|
|||
static inline void __raw_write_unlock_irqrestore(rwlock_t *lock,
|
||||
unsigned long flags)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_write_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
|
@ -262,7 +262,7 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock,
|
|||
|
||||
static inline void __raw_write_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_write_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
|
@ -270,7 +270,7 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock)
|
|||
|
||||
static inline void __raw_write_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
rwlock_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_write_unlock(lock);
|
||||
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
|
||||
}
|
||||
|
|
|
@ -1059,6 +1059,8 @@ struct task_struct {
|
|||
#endif
|
||||
struct list_head pi_state_list;
|
||||
struct futex_pi_state *pi_state_cache;
|
||||
struct mutex futex_exit_mutex;
|
||||
unsigned int futex_state;
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
|
||||
|
@ -1447,7 +1449,6 @@ extern struct pid *cad_pid;
|
|||
*/
|
||||
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
|
||||
#define PF_EXITING 0x00000004 /* Getting shut down */
|
||||
#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
|
||||
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
|
||||
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
|
||||
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
|
||||
|
|
|
@ -117,8 +117,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
|
|||
* succeeds.
|
||||
*/
|
||||
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
|
||||
/* Remove the current tasks stale references to the old mm_struct */
|
||||
extern void mm_release(struct task_struct *, struct mm_struct *);
|
||||
/* Remove the current tasks stale references to the old mm_struct on exit() */
|
||||
extern void exit_mm_release(struct task_struct *, struct mm_struct *);
|
||||
/* Remove the current tasks stale references to the old mm_struct on exec() */
|
||||
extern void exec_mm_release(struct task_struct *, struct mm_struct *);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
extern void mm_update_next_owner(struct mm_struct *mm);
|
||||
|
|
|
@ -79,7 +79,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
|
|||
|
||||
local_irq_save(flags);
|
||||
seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
|
||||
seqcount_release(&l->dep_map, 1, _RET_IP_);
|
||||
seqcount_release(&l->dep_map, _RET_IP_);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -384,7 +384,7 @@ static inline void write_seqcount_begin(seqcount_t *s)
|
|||
|
||||
static inline void write_seqcount_end(seqcount_t *s)
|
||||
{
|
||||
seqcount_release(&s->dep_map, 1, _RET_IP_);
|
||||
seqcount_release(&s->dep_map, _RET_IP_);
|
||||
raw_write_seqcount_end(s);
|
||||
}
|
||||
|
||||
|
|
|
@ -147,7 +147,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
|
|||
|
||||
static inline void __raw_spin_unlock(raw_spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
spin_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_spin_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
|
|||
static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
|
||||
unsigned long flags)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
spin_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_spin_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
|
@ -163,7 +163,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
|
|||
|
||||
static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
spin_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_spin_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
|
@ -171,7 +171,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
|
|||
|
||||
static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
spin_release(&lock->dep_map, _RET_IP_);
|
||||
do_raw_spin_unlock(lock);
|
||||
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
|
||||
}
|
||||
|
|
|
@ -182,7 +182,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
|
|||
static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
mutex_release(&ctx->dep_map, 0, _THIS_IP_);
|
||||
mutex_release(&ctx->dep_map, _THIS_IP_);
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(ctx->acquired);
|
||||
if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
|
||||
|
|
|
@ -1488,7 +1488,7 @@ static inline void sock_release_ownership(struct sock *sk)
|
|||
sk->sk_lock.owned = 0;
|
||||
|
||||
/* The sk_lock has mutex_unlock() semantics: */
|
||||
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
|
||||
mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -339,7 +339,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
|||
* up_read_non_owner(). The rwsem_release() is called
|
||||
* here to release the lock from lockdep's perspective.
|
||||
*/
|
||||
rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_);
|
||||
rwsem_release(¤t->mm->mmap_sem.dep_map, _RET_IP_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -336,7 +336,7 @@ static void lockdep_acquire_cpus_lock(void)
|
|||
|
||||
static void lockdep_release_cpus_lock(void)
|
||||
{
|
||||
rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
|
||||
rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -437,7 +437,7 @@ static void exit_mm(void)
|
|||
struct mm_struct *mm = current->mm;
|
||||
struct core_state *core_state;
|
||||
|
||||
mm_release(current, mm);
|
||||
exit_mm_release(current, mm);
|
||||
if (!mm)
|
||||
return;
|
||||
sync_mm_rss(mm);
|
||||
|
@ -746,32 +746,12 @@ void __noreturn do_exit(long code)
|
|||
*/
|
||||
if (unlikely(tsk->flags & PF_EXITING)) {
|
||||
pr_alert("Fixing recursive fault but reboot is needed!\n");
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses
|
||||
* this flag just to verify whether the pi state
|
||||
* cleanup has been done or not. In the worst case it
|
||||
* loops once more. We pretend that the cleanup was
|
||||
* done as there is no way to return. Either the
|
||||
* OWNER_DIED bit is set by now or we push the blocked
|
||||
* task into the wait for ever nirwana as well.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
futex_exit_recursive(tsk);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* Ensure that all new tsk->pi_lock acquisitions must observe
|
||||
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
|
||||
*/
|
||||
smp_mb();
|
||||
/*
|
||||
* Ensure that we must observe the pi_state in exit_mm() ->
|
||||
* mm_release() -> exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
|
||||
if (unlikely(in_atomic())) {
|
||||
pr_info("note: %s[%d] exited with preempt_count %d\n",
|
||||
|
@ -846,12 +826,6 @@ void __noreturn do_exit(long code)
|
|||
* Make sure we are holding no locks:
|
||||
*/
|
||||
debug_check_no_locks_held();
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses this flag
|
||||
* just to verify whether the pi state cleanup has been done
|
||||
* or not. In the worst case it loops once more.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
|
||||
if (tsk->io_context)
|
||||
exit_io_context(tsk);
|
||||
|
|
|
@ -1283,24 +1283,8 @@ static int wait_for_vfork_done(struct task_struct *child,
|
|||
* restoring the old one. . .
|
||||
* Eric Biederman 10 January 1998
|
||||
*/
|
||||
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
/* Get rid of any futexes when releasing the mm */
|
||||
#ifdef CONFIG_FUTEX
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
#endif
|
||||
|
||||
uprobe_free_utask(tsk);
|
||||
|
||||
/* Get rid of any cached register state */
|
||||
|
@ -1333,6 +1317,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
|||
complete_vfork_done(tsk);
|
||||
}
|
||||
|
||||
void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exit_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exec_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
/**
|
||||
* dup_mm() - duplicates an existing mm structure
|
||||
* @tsk: the task_struct with which the new mm will be associated.
|
||||
|
@ -2124,14 +2120,8 @@ static __latent_entropy struct task_struct *copy_process(
|
|||
#ifdef CONFIG_BLOCK
|
||||
p->plug = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_FUTEX
|
||||
p->robust_list = NULL;
|
||||
#ifdef CONFIG_COMPAT
|
||||
p->compat_robust_list = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&p->pi_state_list);
|
||||
p->pi_state_cache = NULL;
|
||||
#endif
|
||||
futex_init_task(p);
|
||||
|
||||
/*
|
||||
* sigaltstack should be cleared when sharing the same VM
|
||||
*/
|
||||
|
|
326
kernel/futex.c
326
kernel/futex.c
|
@ -325,6 +325,12 @@ static inline bool should_fail_futex(bool fshared)
|
|||
}
|
||||
#endif /* CONFIG_FAIL_FUTEX */
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static void compat_exit_robust_list(struct task_struct *curr);
|
||||
#else
|
||||
static inline void compat_exit_robust_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
static inline void futex_get_mm(union futex_key *key)
|
||||
{
|
||||
mmgrab(key->private.mm);
|
||||
|
@ -890,7 +896,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
|||
* Kernel cleans up PI-state, but userspace is likely hosed.
|
||||
* (Robust-futex cleanup is separate and might save the day for userspace.)
|
||||
*/
|
||||
void exit_pi_state_list(struct task_struct *curr)
|
||||
static void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
struct list_head *next, *head = &curr->pi_state_list;
|
||||
struct futex_pi_state *pi_state;
|
||||
|
@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_struct *curr)
|
|||
}
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void exit_pi_state_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -1169,16 +1176,47 @@ out_error:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_owner_exiting - Block until the owner has exited
|
||||
* @exiting: Pointer to the exiting task
|
||||
*
|
||||
* Caller must hold a refcount on @exiting.
|
||||
*/
|
||||
static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
|
||||
{
|
||||
if (ret != -EBUSY) {
|
||||
WARN_ON_ONCE(exiting);
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
|
||||
return;
|
||||
|
||||
mutex_lock(&exiting->futex_exit_mutex);
|
||||
/*
|
||||
* No point in doing state checking here. If the waiter got here
|
||||
* while the task was in exec()->exec_futex_release() then it can
|
||||
* have any FUTEX_STATE_* value when the waiter has acquired the
|
||||
* mutex. OK, if running, EXITING or DEAD if it reached exit()
|
||||
* already. Highly unlikely and not a problem. Just one more round
|
||||
* through the futex maze.
|
||||
*/
|
||||
mutex_unlock(&exiting->futex_exit_mutex);
|
||||
|
||||
put_task_struct(exiting);
|
||||
}
|
||||
|
||||
static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
u32 uval2;
|
||||
|
||||
/*
|
||||
* If PF_EXITPIDONE is not yet set, then try again.
|
||||
* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
|
||||
* caller that the alleged owner is busy.
|
||||
*/
|
||||
if (tsk && !(tsk->flags & PF_EXITPIDONE))
|
||||
return -EAGAIN;
|
||||
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Reread the user space value to handle the following situation:
|
||||
|
@ -1196,8 +1234,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
|||
* *uaddr = 0xC0000000; tsk = get_task(PID);
|
||||
* } if (!tsk->flags & PF_EXITING) {
|
||||
* ... attach();
|
||||
* tsk->flags |= PF_EXITPIDONE; } else {
|
||||
* if (!(tsk->flags & PF_EXITPIDONE))
|
||||
* tsk->futex_state = } else {
|
||||
* FUTEX_STATE_DEAD; if (tsk->futex_state !=
|
||||
* FUTEX_STATE_DEAD)
|
||||
* return -EAGAIN;
|
||||
* return -ESRCH; <--- FAIL
|
||||
* }
|
||||
|
@ -1228,7 +1267,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
|||
* it after doing proper sanity checks.
|
||||
*/
|
||||
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
struct futex_pi_state **ps)
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
pid_t pid = uval & FUTEX_TID_MASK;
|
||||
struct futex_pi_state *pi_state;
|
||||
|
@ -1253,22 +1293,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
|||
}
|
||||
|
||||
/*
|
||||
* We need to look at the task state flags to figure out,
|
||||
* whether the task is exiting. To protect against the do_exit
|
||||
* change of the task flags, we do this protected by
|
||||
* p->pi_lock:
|
||||
* We need to look at the task state to figure out, whether the
|
||||
* task is exiting. To protect against the change of the task state
|
||||
* in futex_exit_release(), we do this protected by p->pi_lock:
|
||||
*/
|
||||
raw_spin_lock_irq(&p->pi_lock);
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
|
||||
/*
|
||||
* The task is on the way out. When PF_EXITPIDONE is
|
||||
* set, we know that the task has finished the
|
||||
* cleanup:
|
||||
* The task is on the way out. When the futex state is
|
||||
* FUTEX_STATE_DEAD, we know that the task has finished
|
||||
* the cleanup:
|
||||
*/
|
||||
int ret = handle_exit_race(uaddr, uval, p);
|
||||
|
||||
raw_spin_unlock_irq(&p->pi_lock);
|
||||
put_task_struct(p);
|
||||
/*
|
||||
* If the owner task is between FUTEX_STATE_EXITING and
|
||||
* FUTEX_STATE_DEAD then store the task pointer and keep
|
||||
* the reference on the task struct. The calling code will
|
||||
* drop all locks, wait for the task to reach
|
||||
* FUTEX_STATE_DEAD and then drop the refcount. This is
|
||||
* required to prevent a live lock when the current task
|
||||
* preempted the exiting task between the two states.
|
||||
*/
|
||||
if (ret == -EBUSY)
|
||||
*exiting = p;
|
||||
else
|
||||
put_task_struct(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1307,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
|||
|
||||
static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
||||
struct futex_hash_bucket *hb,
|
||||
union futex_key *key, struct futex_pi_state **ps)
|
||||
union futex_key *key, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
struct futex_q *top_waiter = futex_top_waiter(hb, key);
|
||||
|
||||
|
@ -1322,7 +1374,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
|||
* We are the first waiter - try to look up the owner based on
|
||||
* @uval and attach to it.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
|
||||
}
|
||||
|
||||
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
|
@ -1350,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
|||
* lookup
|
||||
* @task: the task to perform the atomic lock work for. This will
|
||||
* be "current" except in the case of requeue pi.
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Return:
|
||||
|
@ -1358,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
|||
* - <0 - error
|
||||
*
|
||||
* The hb->lock and futex_key refs shall be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*/
|
||||
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
union futex_key *key,
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct *task, int set_waiters)
|
||||
struct task_struct *task,
|
||||
struct task_struct **exiting,
|
||||
int set_waiters)
|
||||
{
|
||||
u32 uval, newval, vpid = task_pid_vnr(task);
|
||||
struct futex_q *top_waiter;
|
||||
|
@ -1432,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
|||
* attach to the owner. If that fails, no harm done, we only
|
||||
* set the FUTEX_WAITERS bit in the user space variable.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1480,7 +1540,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
|||
|
||||
/*
|
||||
* Queue the task for later wakeup for after we've released
|
||||
* the hb->lock. wake_q_add() grabs reference to p.
|
||||
* the hb->lock.
|
||||
*/
|
||||
wake_q_add_safe(wake_q, p);
|
||||
}
|
||||
|
@ -1850,6 +1910,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
|||
* @key1: the from futex key
|
||||
* @key2: the to futex key
|
||||
* @ps: address to store the pi_state pointer
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Try and get the lock on behalf of the top waiter if we can do it atomically.
|
||||
|
@ -1857,16 +1919,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
|||
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
|
||||
* hb1 and hb2 must be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*
|
||||
* Return:
|
||||
* - 0 - failed to acquire the lock atomically;
|
||||
* - >0 - acquired the lock, return value is vpid of the top_waiter
|
||||
* - <0 - error
|
||||
*/
|
||||
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2,
|
||||
union futex_key *key1, union futex_key *key2,
|
||||
struct futex_pi_state **ps, int set_waiters)
|
||||
static int
|
||||
futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2, union futex_key *key1,
|
||||
union futex_key *key2, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting, int set_waiters)
|
||||
{
|
||||
struct futex_q *top_waiter = NULL;
|
||||
u32 curval;
|
||||
|
@ -1903,7 +1969,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
|||
*/
|
||||
vpid = task_pid_vnr(top_waiter->task);
|
||||
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
|
||||
set_waiters);
|
||||
exiting, set_waiters);
|
||||
if (ret == 1) {
|
||||
requeue_pi_wake_futex(top_waiter, key2, hb2);
|
||||
return vpid;
|
||||
|
@ -2032,6 +2098,8 @@ retry_private:
|
|||
}
|
||||
|
||||
if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
|
||||
struct task_struct *exiting = NULL;
|
||||
|
||||
/*
|
||||
* Attempt to acquire uaddr2 and wake the top waiter. If we
|
||||
* intend to requeue waiters, force setting the FUTEX_WAITERS
|
||||
|
@ -2039,7 +2107,8 @@ retry_private:
|
|||
* faults rather in the requeue loop below.
|
||||
*/
|
||||
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
|
||||
&key2, &pi_state, nr_requeue);
|
||||
&key2, &pi_state,
|
||||
&exiting, nr_requeue);
|
||||
|
||||
/*
|
||||
* At this point the top_waiter has either taken uaddr2 or is
|
||||
|
@ -2066,7 +2135,8 @@ retry_private:
|
|||
* If that call succeeds then we have pi_state and an
|
||||
* initial refcount on it.
|
||||
*/
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
|
||||
&pi_state, &exiting);
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
|
@ -2084,17 +2154,24 @@ retry_private:
|
|||
if (!ret)
|
||||
goto retry;
|
||||
goto out;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Owner is exiting and we just wait for the
|
||||
* - EBUSY: Owner is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
double_unlock_hb(hb1, hb2);
|
||||
hb_waiters_dec(hb2);
|
||||
put_futex_key(&key2);
|
||||
put_futex_key(&key1);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
|
@ -2801,6 +2878,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
|||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct task_struct *exiting = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q = futex_q_init;
|
||||
|
@ -2822,7 +2900,8 @@ retry:
|
|||
retry_private:
|
||||
hb = queue_lock(&q);
|
||||
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
|
||||
&exiting, 0);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* Atomic work succeeded and we got the lock,
|
||||
|
@ -2835,15 +2914,22 @@ retry_private:
|
|||
goto out_unlock_put_key;
|
||||
case -EFAULT:
|
||||
goto uaddr_faulted;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Task is exiting and we just wait for the
|
||||
* - EBUSY: Task is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
queue_unlock(hb);
|
||||
put_futex_key(&q.key);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
|
@ -3452,11 +3538,16 @@ err_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Constants for the pending_op argument of handle_futex_death */
|
||||
#define HANDLE_DEATH_PENDING true
|
||||
#define HANDLE_DEATH_LIST false
|
||||
|
||||
/*
|
||||
* Process a futex-list entry, check whether it's owned by the
|
||||
* dying task, and do notification if so:
|
||||
*/
|
||||
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
|
||||
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
|
||||
bool pi, bool pending_op)
|
||||
{
|
||||
u32 uval, uninitialized_var(nval), mval;
|
||||
int err;
|
||||
|
@ -3469,6 +3560,42 @@ retry:
|
|||
if (get_user(uval, uaddr))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Special case for regular (non PI) futexes. The unlock path in
|
||||
* user space has two race scenarios:
|
||||
*
|
||||
* 1. The unlock path releases the user space futex value and
|
||||
* before it can execute the futex() syscall to wake up
|
||||
* waiters it is killed.
|
||||
*
|
||||
* 2. A woken up waiter is killed before it can acquire the
|
||||
* futex in user space.
|
||||
*
|
||||
* In both cases the TID validation below prevents a wakeup of
|
||||
* potential waiters which can cause these waiters to block
|
||||
* forever.
|
||||
*
|
||||
* In both cases the following conditions are met:
|
||||
*
|
||||
* 1) task->robust_list->list_op_pending != NULL
|
||||
* @pending_op == true
|
||||
* 2) User space futex value == 0
|
||||
* 3) Regular futex: @pi == false
|
||||
*
|
||||
* If these conditions are met, it is safe to attempt waking up a
|
||||
* potential waiter without touching the user space futex value and
|
||||
* trying to set the OWNER_DIED bit. The user space futex value is
|
||||
* uncontended and the rest of the user space mutex state is
|
||||
* consistent, so a woken waiter will just take over the
|
||||
* uncontended futex. Setting the OWNER_DIED bit would create
|
||||
* inconsistent state and malfunction of the user space owner died
|
||||
* handling.
|
||||
*/
|
||||
if (pending_op && !pi && !uval) {
|
||||
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
|
||||
return 0;
|
||||
|
||||
|
@ -3547,7 +3674,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
|
|||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void exit_robust_list(struct task_struct *curr)
|
||||
static void exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct robust_list_head __user *head = curr->robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
|
@ -3588,10 +3715,11 @@ void exit_robust_list(struct task_struct *curr)
|
|||
* A pending lock might already be on the list, so
|
||||
* don't process it twice:
|
||||
*/
|
||||
if (entry != pending)
|
||||
if (entry != pending) {
|
||||
if (handle_futex_death((void __user *)entry + futex_offset,
|
||||
curr, pi))
|
||||
curr, pi, HANDLE_DEATH_LIST))
|
||||
return;
|
||||
}
|
||||
if (rc)
|
||||
return;
|
||||
entry = next_entry;
|
||||
|
@ -3605,9 +3733,118 @@ void exit_robust_list(struct task_struct *curr)
|
|||
cond_resched();
|
||||
}
|
||||
|
||||
if (pending)
|
||||
if (pending) {
|
||||
handle_futex_death((void __user *)pending + futex_offset,
|
||||
curr, pip);
|
||||
curr, pip, HANDLE_DEATH_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
static void futex_cleanup(struct task_struct *tsk)
|
||||
{
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
}
|
||||
|
||||
/**
|
||||
* futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
|
||||
* @tsk: task to set the state on
|
||||
*
|
||||
* Set the futex exit state of the task lockless. The futex waiter code
|
||||
* observes that state when a task is exiting and loops until the task has
|
||||
* actually finished the futex cleanup. The worst case for this is that the
|
||||
* waiter runs through the wait loop until the state becomes visible.
|
||||
*
|
||||
* This is called from the recursive fault handling path in do_exit().
|
||||
*
|
||||
* This is best effort. Either the futex exit code has run already or
|
||||
* not. If the OWNER_DIED bit has been set on the futex then the waiter can
|
||||
* take it over. If not, the problem is pushed back to user space. If the
|
||||
* futex exit code did not run yet, then an already queued waiter might
|
||||
* block forever, but there is nothing which can be done about that.
|
||||
*/
|
||||
void futex_exit_recursive(struct task_struct *tsk)
|
||||
{
|
||||
/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
|
||||
if (tsk->futex_state == FUTEX_STATE_EXITING)
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
tsk->futex_state = FUTEX_STATE_DEAD;
|
||||
}
|
||||
|
||||
static void futex_cleanup_begin(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Prevent various race issues against a concurrent incoming waiter
|
||||
* including live locks by forcing the waiter to block on
|
||||
* tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
|
||||
* attach_to_pi_owner().
|
||||
*/
|
||||
mutex_lock(&tsk->futex_exit_mutex);
|
||||
|
||||
/*
|
||||
* Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
|
||||
*
|
||||
* This ensures that all subsequent checks of tsk->futex_state in
|
||||
* attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
|
||||
* tsk->pi_lock held.
|
||||
*
|
||||
* It guarantees also that a pi_state which was queued right before
|
||||
* the state change under tsk->pi_lock by a concurrent waiter must
|
||||
* be observed in exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
tsk->futex_state = FUTEX_STATE_EXITING;
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
}
|
||||
|
||||
static void futex_cleanup_end(struct task_struct *tsk, int state)
|
||||
{
|
||||
/*
|
||||
* Lockless store. The only side effect is that an observer might
|
||||
* take another loop until it becomes visible.
|
||||
*/
|
||||
tsk->futex_state = state;
|
||||
/*
|
||||
* Drop the exit protection. This unblocks waiters which observed
|
||||
* FUTEX_STATE_EXITING to reevaluate the state.
|
||||
*/
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
}
|
||||
|
||||
void futex_exec_release(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* The state handling is done for consistency, but in the case of
|
||||
* exec() there is no way to prevent futher damage as the PID stays
|
||||
* the same. But for the unlikely and arguably buggy case that a
|
||||
* futex is held on exec(), this provides at least as much state
|
||||
* consistency protection which is possible.
|
||||
*/
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
/*
|
||||
* Reset the state to FUTEX_STATE_OK. The task is alive and about
|
||||
* exec a new binary.
|
||||
*/
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_OK);
|
||||
}
|
||||
|
||||
void futex_exit_release(struct task_struct *tsk)
|
||||
{
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
|
||||
}
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
|
@ -3737,7 +3974,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
|
|||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void compat_exit_robust_list(struct task_struct *curr)
|
||||
static void compat_exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct compat_robust_list_head __user *head = curr->compat_robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
|
@ -3784,7 +4021,8 @@ void compat_exit_robust_list(struct task_struct *curr)
|
|||
if (entry != pending) {
|
||||
void __user *uaddr = futex_uaddr(entry, futex_offset);
|
||||
|
||||
if (handle_futex_death(uaddr, curr, pi))
|
||||
if (handle_futex_death(uaddr, curr, pi,
|
||||
HANDLE_DEATH_LIST))
|
||||
return;
|
||||
}
|
||||
if (rc)
|
||||
|
@ -3803,7 +4041,7 @@ void compat_exit_robust_list(struct task_struct *curr)
|
|||
if (pending) {
|
||||
void __user *uaddr = futex_uaddr(pending, futex_offset);
|
||||
|
||||
handle_futex_death(uaddr, curr, pip);
|
||||
handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4208,11 +4208,9 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
|
|||
}
|
||||
|
||||
/*
|
||||
* Remove the lock to the list of currently held locks - this gets
|
||||
* Remove the lock from the list of currently held locks - this gets
|
||||
* called on mutex_unlock()/spin_unlock*() (or on a failed
|
||||
* mutex_lock_interruptible()).
|
||||
*
|
||||
* @nested is an hysterical artifact, needs a tree wide cleanup.
|
||||
*/
|
||||
static int
|
||||
__lock_release(struct lockdep_map *lock, unsigned long ip)
|
||||
|
@ -4491,8 +4489,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(lock_acquire);
|
||||
|
||||
void lock_release(struct lockdep_map *lock, int nested,
|
||||
unsigned long ip)
|
||||
void lock_release(struct lockdep_map *lock, unsigned long ip)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
|
|
@ -733,6 +733,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
|
|||
*/
|
||||
void __sched mutex_unlock(struct mutex *lock)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
WARN_ON(in_interrupt());
|
||||
#endif
|
||||
#ifndef CONFIG_DEBUG_LOCK_ALLOC
|
||||
if (__mutex_unlock_fast(lock))
|
||||
return;
|
||||
|
@ -1091,7 +1094,7 @@ err:
|
|||
err_early_kill:
|
||||
spin_unlock(&lock->wait_lock);
|
||||
debug_mutex_free_waiter(&waiter);
|
||||
mutex_release(&lock->dep_map, 1, ip);
|
||||
mutex_release(&lock->dep_map, ip);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
@ -1225,7 +1228,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
|
|||
DEFINE_WAKE_Q(wake_q);
|
||||
unsigned long owner;
|
||||
|
||||
mutex_release(&lock->dep_map, 1, ip);
|
||||
mutex_release(&lock->dep_map, ip);
|
||||
|
||||
/*
|
||||
* Release the lock before (potentially) taking the spinlock such that
|
||||
|
@ -1413,6 +1416,7 @@ int __sched mutex_trylock(struct mutex *lock)
|
|||
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
|
||||
WARN_ON(in_interrupt());
|
||||
#endif
|
||||
|
||||
locked = __mutex_trylock(lock);
|
||||
|
|
|
@ -1517,7 +1517,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
|
|||
mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
|
||||
if (ret)
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1561,7 +1561,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
|
|||
RT_MUTEX_MIN_CHAINWALK,
|
||||
rt_mutex_slowlock);
|
||||
if (ret)
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1600,7 +1600,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
|
|||
*/
|
||||
void __sched rt_mutex_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
|
||||
|
|
|
@ -1504,7 +1504,7 @@ int __sched down_read_killable(struct rw_semaphore *sem)
|
|||
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
|
@ -1546,7 +1546,7 @@ int __sched down_write_killable(struct rw_semaphore *sem)
|
|||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
|
||||
__down_write_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
|
@ -1573,7 +1573,7 @@ EXPORT_SYMBOL(down_write_trylock);
|
|||
*/
|
||||
void up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
__up_read(sem);
|
||||
}
|
||||
EXPORT_SYMBOL(up_read);
|
||||
|
@ -1583,7 +1583,7 @@ EXPORT_SYMBOL(up_read);
|
|||
*/
|
||||
void up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
__up_write(sem);
|
||||
}
|
||||
EXPORT_SYMBOL(up_write);
|
||||
|
@ -1639,7 +1639,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
|
|||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
|
||||
__down_write_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
|
|
|
@ -671,17 +671,6 @@ EXPORT_SYMBOL(__stack_chk_fail);
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_REFCOUNT
|
||||
void refcount_error_report(struct pt_regs *regs, const char *err)
|
||||
{
|
||||
WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
|
||||
err, (void *)instruction_pointer(regs),
|
||||
current->comm, task_pid_nr(current),
|
||||
from_kuid_munged(&init_user_ns, current_uid()),
|
||||
from_kuid_munged(&init_user_ns, current_euid()));
|
||||
}
|
||||
#endif
|
||||
|
||||
core_param(panic, panic_timeout, int, 0644);
|
||||
core_param(panic_print, panic_print, ulong, 0644);
|
||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||
|
|
|
@ -248,7 +248,7 @@ static void __up_console_sem(unsigned long ip)
|
|||
{
|
||||
unsigned long flags;
|
||||
|
||||
mutex_release(&console_lock_dep_map, 1, ip);
|
||||
mutex_release(&console_lock_dep_map, ip);
|
||||
|
||||
printk_safe_enter_irqsave(flags);
|
||||
up(&console_sem);
|
||||
|
@ -1679,20 +1679,20 @@ static int console_lock_spinning_disable_and_check(void)
|
|||
raw_spin_unlock(&console_owner_lock);
|
||||
|
||||
if (!waiter) {
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The waiter is now free to continue */
|
||||
WRITE_ONCE(console_waiter, false);
|
||||
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
|
||||
/*
|
||||
* Hand off console_lock to waiter. The waiter will perform
|
||||
* the up(). After this, the waiter is the console_lock owner.
|
||||
*/
|
||||
mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&console_lock_dep_map, _THIS_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1746,7 +1746,7 @@ static int console_trylock_spinning(void)
|
|||
/* Owner will clear console_waiter on hand off */
|
||||
while (READ_ONCE(console_waiter))
|
||||
cpu_relax();
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
|
||||
printk_safe_exit_irqrestore(flags);
|
||||
/*
|
||||
|
|
|
@ -3106,7 +3106,7 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf
|
|||
* do an early lockdep release here:
|
||||
*/
|
||||
rq_unpin_lock(rq, rf);
|
||||
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
|
||||
spin_release(&rq->lock.dep_map, _THIS_IP_);
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
/* this is a valid case when another task releases the spinlock */
|
||||
rq->lock.owner = next;
|
||||
|
|
|
@ -1475,7 +1475,7 @@ static void ww_test_edeadlk_normal(void)
|
|||
|
||||
mutex_lock(&o2.base);
|
||||
o2.ctx = &t2;
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
|
||||
WWAI(&t);
|
||||
t2 = t;
|
||||
|
@ -1500,7 +1500,7 @@ static void ww_test_edeadlk_normal_slow(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1527,7 +1527,7 @@ static void ww_test_edeadlk_no_unlock(void)
|
|||
|
||||
mutex_lock(&o2.base);
|
||||
o2.ctx = &t2;
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
|
||||
WWAI(&t);
|
||||
t2 = t;
|
||||
|
@ -1551,7 +1551,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1576,7 +1576,7 @@ static void ww_test_edeadlk_acquire_more(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1597,7 +1597,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1618,11 +1618,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
mutex_lock(&o3.base);
|
||||
mutex_release(&o3.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o3.base.dep_map, _THIS_IP_);
|
||||
o3.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1644,11 +1644,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
mutex_lock(&o3.base);
|
||||
mutex_release(&o3.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o3.base.dep_map, _THIS_IP_);
|
||||
o3.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1669,7 +1669,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
@ -1694,7 +1694,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
|
|||
int ret;
|
||||
|
||||
mutex_lock(&o2.base);
|
||||
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&o2.base.dep_map, _THIS_IP_);
|
||||
o2.ctx = &t2;
|
||||
|
||||
WWAI(&t);
|
||||
|
|
255
lib/refcount.c
255
lib/refcount.c
|
@ -1,41 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Variant of atomic_t specialized for reference counts.
|
||||
*
|
||||
* The interface matches the atomic_t interface (to aid in porting) but only
|
||||
* provides the few functions one should use for reference counting.
|
||||
*
|
||||
* It differs in that the counter saturates at UINT_MAX and will not move once
|
||||
* there. This avoids wrapping the counter and causing 'spurious'
|
||||
* use-after-free issues.
|
||||
*
|
||||
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
|
||||
* and provide only what is strictly required for refcounts.
|
||||
*
|
||||
* The increments are fully relaxed; these will not provide ordering. The
|
||||
* rationale is that whatever is used to obtain the object we're increasing the
|
||||
* reference count on will provide the ordering. For locked data structures,
|
||||
* its the lock acquire, for RCU/lockless data structures its the dependent
|
||||
* load.
|
||||
*
|
||||
* Do note that inc_not_zero() provides a control dependency which will order
|
||||
* future stores against the inc, this ensures we'll never modify the object
|
||||
* if we did not in fact acquire a reference.
|
||||
*
|
||||
* The decrements will provide release order, such that all the prior loads and
|
||||
* stores will be issued before, it also provides a control dependency, which
|
||||
* will order us against the subsequent free().
|
||||
*
|
||||
* The control dependency is against the load of the cmpxchg (ll/sc) that
|
||||
* succeeded. This means the stores aren't fully ordered, but this is fine
|
||||
* because the 1->0 transition indicates no concurrency.
|
||||
*
|
||||
* Note that the allocator is responsible for ordering things between free()
|
||||
* and alloc().
|
||||
*
|
||||
* The decrements dec_and_test() and sub_and_test() also provide acquire
|
||||
* ordering on success.
|
||||
*
|
||||
* Out-of-line refcount functions.
|
||||
*/
|
||||
|
||||
#include <linux/mutex.h>
|
||||
|
@ -43,199 +8,33 @@
|
|||
#include <linux/spinlock.h>
|
||||
#include <linux/bug.h>
|
||||
|
||||
/**
|
||||
* refcount_add_not_zero_checked - add a value to a refcount unless it is 0
|
||||
* @i: the value to add to the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_inc(), or one of its variants, should instead be used to
|
||||
* increment a reference count.
|
||||
*
|
||||
* Return: false if the passed refcount is 0, true otherwise
|
||||
*/
|
||||
bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r)
|
||||
#define REFCOUNT_WARN(str) WARN_ONCE(1, "refcount_t: " str ".\n")
|
||||
|
||||
void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t)
|
||||
{
|
||||
unsigned int new, val = atomic_read(&r->refs);
|
||||
refcount_set(r, REFCOUNT_SATURATED);
|
||||
|
||||
do {
|
||||
if (!val)
|
||||
return false;
|
||||
|
||||
if (unlikely(val == UINT_MAX))
|
||||
return true;
|
||||
|
||||
new = val + i;
|
||||
if (new < val)
|
||||
new = UINT_MAX;
|
||||
|
||||
} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
|
||||
|
||||
WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_add_not_zero_checked);
|
||||
|
||||
/**
|
||||
* refcount_add_checked - add a value to a refcount
|
||||
* @i: the value to add to the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_add(), but will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_inc(), or one of its variants, should instead be used to
|
||||
* increment a reference count.
|
||||
*/
|
||||
void refcount_add_checked(unsigned int i, refcount_t *r)
|
||||
{
|
||||
WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n");
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_add_checked);
|
||||
|
||||
/**
|
||||
* refcount_inc_not_zero_checked - increment a refcount unless it is 0
|
||||
* @r: the refcount to increment
|
||||
*
|
||||
* Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*
|
||||
* Return: true if the increment was successful, false otherwise
|
||||
*/
|
||||
bool refcount_inc_not_zero_checked(refcount_t *r)
|
||||
{
|
||||
unsigned int new, val = atomic_read(&r->refs);
|
||||
|
||||
do {
|
||||
new = val + 1;
|
||||
|
||||
if (!val)
|
||||
return false;
|
||||
|
||||
if (unlikely(!new))
|
||||
return true;
|
||||
|
||||
} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
|
||||
|
||||
WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_inc_not_zero_checked);
|
||||
|
||||
/**
|
||||
* refcount_inc_checked - increment a refcount
|
||||
* @r: the refcount to increment
|
||||
*
|
||||
* Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller already has a
|
||||
* reference on the object.
|
||||
*
|
||||
* Will WARN if the refcount is 0, as this represents a possible use-after-free
|
||||
* condition.
|
||||
*/
|
||||
void refcount_inc_checked(refcount_t *r)
|
||||
{
|
||||
WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n");
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_inc_checked);
|
||||
|
||||
/**
|
||||
* refcount_sub_and_test_checked - subtract from a refcount and test if it is 0
|
||||
* @i: amount to subtract from the refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec_and_test(), but it will WARN, return false and
|
||||
* ultimately leak on underflow and will fail to decrement when saturated
|
||||
* at UINT_MAX.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides an acquire ordering on success such that free()
|
||||
* must come after.
|
||||
*
|
||||
* Use of this function is not recommended for the normal reference counting
|
||||
* use case in which references are taken and released one at a time. In these
|
||||
* cases, refcount_dec(), or one of its variants, should instead be used to
|
||||
* decrement a reference count.
|
||||
*
|
||||
* Return: true if the resulting refcount is 0, false otherwise
|
||||
*/
|
||||
bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
|
||||
{
|
||||
unsigned int new, val = atomic_read(&r->refs);
|
||||
|
||||
do {
|
||||
if (unlikely(val == UINT_MAX))
|
||||
return false;
|
||||
|
||||
new = val - i;
|
||||
if (new > val) {
|
||||
WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
|
||||
|
||||
if (!new) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
return true;
|
||||
switch (t) {
|
||||
case REFCOUNT_ADD_NOT_ZERO_OVF:
|
||||
REFCOUNT_WARN("saturated; leaking memory");
|
||||
break;
|
||||
case REFCOUNT_ADD_OVF:
|
||||
REFCOUNT_WARN("saturated; leaking memory");
|
||||
break;
|
||||
case REFCOUNT_ADD_UAF:
|
||||
REFCOUNT_WARN("addition on 0; use-after-free");
|
||||
break;
|
||||
case REFCOUNT_SUB_UAF:
|
||||
REFCOUNT_WARN("underflow; use-after-free");
|
||||
break;
|
||||
case REFCOUNT_DEC_LEAK:
|
||||
REFCOUNT_WARN("decrement hit 0; leaking memory");
|
||||
break;
|
||||
default:
|
||||
REFCOUNT_WARN("unknown saturation event!?");
|
||||
}
|
||||
return false;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_sub_and_test_checked);
|
||||
|
||||
/**
|
||||
* refcount_dec_and_test_checked - decrement a refcount and test if it is 0
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
|
||||
* decrement when saturated at UINT_MAX.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides an acquire ordering on success such that free()
|
||||
* must come after.
|
||||
*
|
||||
* Return: true if the resulting refcount is 0, false otherwise
|
||||
*/
|
||||
bool refcount_dec_and_test_checked(refcount_t *r)
|
||||
{
|
||||
return refcount_sub_and_test_checked(1, r);
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_dec_and_test_checked);
|
||||
|
||||
/**
|
||||
* refcount_dec_checked - decrement a refcount
|
||||
* @r: the refcount
|
||||
*
|
||||
* Similar to atomic_dec(), it will WARN on underflow and fail to decrement
|
||||
* when saturated at UINT_MAX.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before.
|
||||
*/
|
||||
void refcount_dec_checked(refcount_t *r)
|
||||
{
|
||||
WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n");
|
||||
}
|
||||
EXPORT_SYMBOL(refcount_dec_checked);
|
||||
EXPORT_SYMBOL(refcount_warn_saturate);
|
||||
|
||||
/**
|
||||
* refcount_dec_if_one - decrement a refcount if it is 1
|
||||
|
@ -277,7 +76,7 @@ bool refcount_dec_not_one(refcount_t *r)
|
|||
unsigned int new, val = atomic_read(&r->refs);
|
||||
|
||||
do {
|
||||
if (unlikely(val == UINT_MAX))
|
||||
if (unlikely(val == REFCOUNT_SATURATED))
|
||||
return true;
|
||||
|
||||
if (val == 1)
|
||||
|
@ -302,7 +101,7 @@ EXPORT_SYMBOL(refcount_dec_not_one);
|
|||
* @lock: the mutex to be locked
|
||||
*
|
||||
* Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
|
||||
* to decrement when saturated at UINT_MAX.
|
||||
* to decrement when saturated at REFCOUNT_SATURATED.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides a control dependency such that free() must come after.
|
||||
|
@ -333,7 +132,7 @@ EXPORT_SYMBOL(refcount_dec_and_mutex_lock);
|
|||
* @lock: the spinlock to be locked
|
||||
*
|
||||
* Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
|
||||
* decrement when saturated at UINT_MAX.
|
||||
* decrement when saturated at REFCOUNT_SATURATED.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides a control dependency such that free() must come after.
|
||||
|
|
|
@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
|
|||
* Kernel threads bound to a single CPU can safely use
|
||||
* smp_processor_id():
|
||||
*/
|
||||
if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
|
||||
if (current->nr_cpus_allowed == 1)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
|
|
@ -1800,7 +1800,7 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
|
|||
struct mem_cgroup *iter;
|
||||
|
||||
spin_lock(&memcg_oom_lock);
|
||||
mutex_release(&memcg_oom_lock_dep_map, 1, _RET_IP_);
|
||||
mutex_release(&memcg_oom_lock_dep_map, _RET_IP_);
|
||||
for_each_mem_cgroup_tree(iter, memcg)
|
||||
iter->oom_lock = false;
|
||||
spin_unlock(&memcg_oom_lock);
|
||||
|
|
|
@ -518,7 +518,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
|
|||
|
||||
rc = sk_backlog_rcv(sk, skb);
|
||||
|
||||
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
|
||||
mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
|
||||
} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
|
||||
bh_unlock_sock(sk);
|
||||
atomic_inc(&sk->sk_drops);
|
||||
|
|
|
@ -42,8 +42,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
|
|||
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
int trylock, int read, int check,
|
||||
struct lockdep_map *nest_lock, unsigned long ip);
|
||||
void lock_release(struct lockdep_map *lock, int nested,
|
||||
unsigned long ip);
|
||||
void lock_release(struct lockdep_map *lock, unsigned long ip);
|
||||
void lockdep_reset_lock(struct lockdep_map *lock);
|
||||
void lockdep_register_key(struct lock_class_key *key);
|
||||
void lockdep_unregister_key(struct lock_class_key *key);
|
||||
|
|
|
@ -42,7 +42,7 @@ static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock
|
|||
|
||||
static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
|
||||
{
|
||||
lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
|
||||
return pthread_mutex_unlock(&lock->mutex);
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *
|
|||
|
||||
static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
|
||||
{
|
||||
lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
|
||||
return pthread_rwlock_unlock(&lock->rwlock);
|
||||
}
|
||||
|
||||
|
|
|
@ -270,7 +270,7 @@ int pthread_mutex_lock(pthread_mutex_t *mutex)
|
|||
*/
|
||||
r = ll_pthread_mutex_lock(mutex);
|
||||
if (r)
|
||||
lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -284,7 +284,7 @@ int pthread_mutex_trylock(pthread_mutex_t *mutex)
|
|||
lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_mutex_trylock(mutex);
|
||||
if (r)
|
||||
lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -295,7 +295,7 @@ int pthread_mutex_unlock(pthread_mutex_t *mutex)
|
|||
|
||||
try_init_preload();
|
||||
|
||||
lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
|
||||
/*
|
||||
* Just like taking a lock, only in reverse!
|
||||
*
|
||||
|
@ -355,7 +355,7 @@ int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
|
|||
lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_rwlock_rdlock(rwlock);
|
||||
if (r)
|
||||
lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
|
|||
lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_rwlock_tryrdlock(rwlock);
|
||||
if (r)
|
||||
lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -383,7 +383,7 @@ int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
|
|||
lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_rwlock_trywrlock(rwlock);
|
||||
if (r)
|
||||
lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -397,7 +397,7 @@ int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
|
|||
lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_rwlock_wrlock(rwlock);
|
||||
if (r)
|
||||
lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -408,7 +408,7 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
|
|||
|
||||
init_preload();
|
||||
|
||||
lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
|
||||
lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
|
||||
r = ll_pthread_rwlock_unlock(rwlock);
|
||||
if (r)
|
||||
lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
|
||||
|
|
Loading…
Reference in New Issue