ARC: cmpxchg/xchg: rewrite as macros to make type safe
Existing code forces/assume args to type "long" which won't work in LP64 regime, so prepare code for that Interestingly this should be a non functional change but I do see some codegen changes | bloat-o-meter vmlinux-cmpxchg-A vmlinux-cmpxchg-B | add/remove: 0/0 grow/shrink: 17/12 up/down: 218/-150 (68) | | Function old new delta | rwsem_optimistic_spin 518 550 +32 | rwsem_down_write_slowpath 1244 1274 +30 | __do_sys_perf_event_open 2576 2600 +24 | down_read 192 200 +8 | __down_read 192 200 +8 ... | task_work_run 168 148 -20 | dma_fence_chain_walk.part 760 736 -24 | __genradix_ptr_alloc 674 646 -28 Total: Before=6187409, After=6187477, chg +0.00% Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@kernel.org>
This commit is contained in:
parent
ecf51c9fa0
commit
e188f3330a
|
@ -6,6 +6,7 @@
|
|||
#ifndef __ASM_ARC_CMPXCHG_H
|
||||
#define __ASM_ARC_CMPXCHG_H
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
|
@ -13,62 +14,77 @@
|
|||
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
||||
{
|
||||
unsigned long prev;
|
||||
|
||||
/*
|
||||
* Explicit full memory barrier needed before/after as
|
||||
* LLOCK/SCOND themselves don't provide any such semantics
|
||||
* if (*ptr == @old)
|
||||
* *ptr = @new
|
||||
*/
|
||||
smp_mb();
|
||||
#define __cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) _prev; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %0, [%1] \n" \
|
||||
" brne %0, %2, 2f \n" \
|
||||
" scond %3, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
"2: \n" \
|
||||
: "=&r"(_prev) /* Early clobber prevent reg reuse */ \
|
||||
: "r"(ptr), /* Not "m": llock only supports reg */ \
|
||||
"ir"(old), \
|
||||
"r"(new) /* Not "ir": scond can't take LIMM */ \
|
||||
: "cc", \
|
||||
"memory"); /* gcc knows memory is clobbered */ \
|
||||
\
|
||||
_prev; \
|
||||
})
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llock %0, [%1] \n"
|
||||
" brne %0, %2, 2f \n"
|
||||
" scond %3, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
|
||||
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
|
||||
"ir"(expected),
|
||||
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
|
||||
: "cc", "memory"); /* so that gcc knows memory is being written here */
|
||||
#define arch_cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _o_ = (old); \
|
||||
__typeof__(*(ptr)) _n_ = (new); \
|
||||
__typeof__(*(ptr)) _prev_; \
|
||||
\
|
||||
switch(sizeof((_p_))) { \
|
||||
case 4: \
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
_prev_ = __cmpxchg(_p_, _o_, _n_); \
|
||||
smp_mb(); \
|
||||
break; \
|
||||
default: \
|
||||
BUILD_BUG(); \
|
||||
} \
|
||||
_prev_; \
|
||||
})
|
||||
|
||||
smp_mb();
|
||||
#else
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
||||
{
|
||||
unsigned long flags;
|
||||
int prev;
|
||||
volatile unsigned long *p = ptr;
|
||||
|
||||
/*
|
||||
* spin lock/unlock provide the needed smp_mb() before/after
|
||||
*/
|
||||
atomic_ops_lock(flags);
|
||||
prev = *p;
|
||||
if (prev == expected)
|
||||
*p = new;
|
||||
atomic_ops_unlock(flags);
|
||||
return prev;
|
||||
}
|
||||
#define arch_cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
volatile __typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _o_ = (old); \
|
||||
__typeof__(*(ptr)) _n_ = (new); \
|
||||
__typeof__(*(ptr)) _prev_; \
|
||||
unsigned long __flags; \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(_p_) != 4); \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provide the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(__flags); \
|
||||
_prev_ = *_p_; \
|
||||
if (_prev_ == _o_) \
|
||||
*_p_ = _n_; \
|
||||
atomic_ops_unlock(__flags); \
|
||||
_prev_; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
#define arch_cmpxchg(ptr, o, n) ({ \
|
||||
(typeof(*(ptr)))__cmpxchg((ptr), \
|
||||
(unsigned long)(o), \
|
||||
(unsigned long)(n)); \
|
||||
})
|
||||
|
||||
/*
|
||||
* atomic_cmpxchg is same as cmpxchg
|
||||
* LLSC: only different in data-type, semantics are exactly same
|
||||
|
@ -77,60 +93,65 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
|||
*/
|
||||
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
|
||||
|
||||
|
||||
/*
|
||||
* xchg (reg with memory) based on "Native atomic" EX insn
|
||||
* xchg
|
||||
*/
|
||||
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
|
||||
int size)
|
||||
{
|
||||
extern unsigned long __xchg_bad_pointer(void);
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
switch (size) {
|
||||
case 4:
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
" ex %0, [%1] \n"
|
||||
: "+r"(val)
|
||||
: "r"(ptr)
|
||||
: "memory");
|
||||
|
||||
smp_mb();
|
||||
|
||||
return val;
|
||||
}
|
||||
return __xchg_bad_pointer();
|
||||
}
|
||||
|
||||
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
|
||||
sizeof(*(ptr))))
|
||||
|
||||
/*
|
||||
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
|
||||
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
|
||||
* due to a subtle reason:
|
||||
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
|
||||
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
|
||||
* Hence xchg() needs to follow same locking rules.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
#define arch_xchg(ptr, with) \
|
||||
#define __xchg(ptr, val) \
|
||||
({ \
|
||||
unsigned long flags; \
|
||||
typeof(*(ptr)) old_val; \
|
||||
\
|
||||
atomic_ops_lock(flags); \
|
||||
old_val = _xchg(ptr, with); \
|
||||
atomic_ops_unlock(flags); \
|
||||
old_val; \
|
||||
__asm__ __volatile__( \
|
||||
" ex %0, [%1] \n" /* set new value */ \
|
||||
: "+r"(val) \
|
||||
: "r"(ptr) \
|
||||
: "memory"); \
|
||||
_val_; /* get old value */ \
|
||||
})
|
||||
|
||||
#else
|
||||
#define arch_xchg(ptr, val) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _val_ = (val); \
|
||||
\
|
||||
switch(sizeof(*(_p_))) { \
|
||||
case 4: \
|
||||
smp_mb(); \
|
||||
_val_ = __xchg(_p_, _val_); \
|
||||
smp_mb(); \
|
||||
break; \
|
||||
default: \
|
||||
BUILD_BUG(); \
|
||||
} \
|
||||
_val_; \
|
||||
})
|
||||
|
||||
#define arch_xchg(ptr, with) _xchg(ptr, with)
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
/*
|
||||
* EX instructions is baseline and present in !LLSC too. But in this
|
||||
* regime it still needs use @atomic_ops_lock spinlock to allow interop
|
||||
* with cmpxchg() which uses spinlock in !LLSC
|
||||
* (llist.h use xchg and cmpxchg on sama data)
|
||||
*/
|
||||
|
||||
#define arch_xchg(ptr, val) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _val_ = (val); \
|
||||
\
|
||||
unsigned long __flags; \
|
||||
\
|
||||
atomic_ops_lock(__flags); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
" ex %0, [%1] \n" \
|
||||
: "+r"(_val_) \
|
||||
: "r"(_p_) \
|
||||
: "memory"); \
|
||||
\
|
||||
atomic_ops_unlock(__flags); \
|
||||
_val_; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue