ARC: cmpxchg/xchg: rewrite as macros to make type safe

Existing code forces/assume args to type "long" which won't work in LP64
regime, so prepare code for that

Interestingly this should be a non functional change but I do see
some codegen changes

| bloat-o-meter vmlinux-cmpxchg-A vmlinux-cmpxchg-B
| add/remove: 0/0 grow/shrink: 17/12 up/down: 218/-150 (68)
|
| Function                                     old     new   delta
| rwsem_optimistic_spin                        518     550     +32
| rwsem_down_write_slowpath                   1244    1274     +30
| __do_sys_perf_event_open                    2576    2600     +24
| down_read                                    192     200      +8
| __down_read                                  192     200      +8
...
| task_work_run                                168     148     -20
| dma_fence_chain_walk.part                    760     736     -24
| __genradix_ptr_alloc                         674     646     -28

Total: Before=6187409, After=6187477, chg +0.00%

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
This commit is contained in:
Vineet Gupta 2020-05-06 15:13:25 -07:00
parent ecf51c9fa0
commit e188f3330a
1 changed files with 119 additions and 98 deletions

View File

@ -6,6 +6,7 @@
#ifndef __ASM_ARC_CMPXCHG_H #ifndef __ASM_ARC_CMPXCHG_H
#define __ASM_ARC_CMPXCHG_H #define __ASM_ARC_CMPXCHG_H
#include <linux/build_bug.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/barrier.h> #include <asm/barrier.h>
@ -13,62 +14,77 @@
#ifdef CONFIG_ARC_HAS_LLSC #ifdef CONFIG_ARC_HAS_LLSC
static inline unsigned long /*
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) * if (*ptr == @old)
{ * *ptr = @new
unsigned long prev; */
#define __cmpxchg(ptr, old, new) \
({ \
__typeof__(*(ptr)) _prev; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" brne %0, %2, 2f \n" \
" scond %3, [%1] \n" \
" bnz 1b \n" \
"2: \n" \
: "=&r"(_prev) /* Early clobber prevent reg reuse */ \
: "r"(ptr), /* Not "m": llock only supports reg */ \
"ir"(old), \
"r"(new) /* Not "ir": scond can't take LIMM */ \
: "cc", \
"memory"); /* gcc knows memory is clobbered */ \
\
_prev; \
})
/* #define arch_cmpxchg(ptr, old, new) \
* Explicit full memory barrier needed before/after as ({ \
* LLOCK/SCOND themselves don't provide any such semantics __typeof__(ptr) _p_ = (ptr); \
*/ __typeof__(*(ptr)) _o_ = (old); \
smp_mb(); __typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
\
switch(sizeof((_p_))) { \
case 4: \
/* \
* Explicit full memory barrier needed before/after \
*/ \
smp_mb(); \
_prev_ = __cmpxchg(_p_, _o_, _n_); \
smp_mb(); \
break; \
default: \
BUILD_BUG(); \
} \
_prev_; \
})
__asm__ __volatile__( #else
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
"ir"(expected),
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
: "cc", "memory"); /* so that gcc knows memory is being written here */
smp_mb(); #define arch_cmpxchg(ptr, old, new) \
({ \
return prev; volatile __typeof__(ptr) _p_ = (ptr); \
} __typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
#else /* !CONFIG_ARC_HAS_LLSC */ __typeof__(*(ptr)) _prev_; \
unsigned long __flags; \
static inline unsigned long \
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) BUILD_BUG_ON(sizeof(_p_) != 4); \
{ \
unsigned long flags; /* \
int prev; * spin lock/unlock provide the needed smp_mb() before/after \
volatile unsigned long *p = ptr; */ \
atomic_ops_lock(__flags); \
/* _prev_ = *_p_; \
* spin lock/unlock provide the needed smp_mb() before/after if (_prev_ == _o_) \
*/ *_p_ = _n_; \
atomic_ops_lock(flags); atomic_ops_unlock(__flags); \
prev = *p; _prev_; \
if (prev == expected) })
*p = new;
atomic_ops_unlock(flags);
return prev;
}
#endif #endif
#define arch_cmpxchg(ptr, o, n) ({ \
(typeof(*(ptr)))__cmpxchg((ptr), \
(unsigned long)(o), \
(unsigned long)(n)); \
})
/* /*
* atomic_cmpxchg is same as cmpxchg * atomic_cmpxchg is same as cmpxchg
* LLSC: only different in data-type, semantics are exactly same * LLSC: only different in data-type, semantics are exactly same
@ -77,60 +93,65 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
*/ */
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) #define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
/* /*
* xchg (reg with memory) based on "Native atomic" EX insn * xchg
*/ */
static inline unsigned long __xchg(unsigned long val, volatile void *ptr, #ifdef CONFIG_ARC_HAS_LLSC
int size)
{
extern unsigned long __xchg_bad_pointer(void);
switch (size) { #define __xchg(ptr, val) \
case 4: ({ \
smp_mb(); __asm__ __volatile__( \
" ex %0, [%1] \n" /* set new value */ \
__asm__ __volatile__( : "+r"(val) \
" ex %0, [%1] \n" : "r"(ptr) \
: "+r"(val) : "memory"); \
: "r"(ptr) _val_; /* get old value */ \
: "memory");
smp_mb();
return val;
}
return __xchg_bad_pointer();
}
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
sizeof(*(ptr))))
/*
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
* due to a subtle reason:
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* Hence xchg() needs to follow same locking rules.
*/
#ifndef CONFIG_ARC_HAS_LLSC
#define arch_xchg(ptr, with) \
({ \
unsigned long flags; \
typeof(*(ptr)) old_val; \
\
atomic_ops_lock(flags); \
old_val = _xchg(ptr, with); \
atomic_ops_unlock(flags); \
old_val; \
}) })
#else #define arch_xchg(ptr, val) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
switch(sizeof(*(_p_))) { \
case 4: \
smp_mb(); \
_val_ = __xchg(_p_, _val_); \
smp_mb(); \
break; \
default: \
BUILD_BUG(); \
} \
_val_; \
})
#define arch_xchg(ptr, with) _xchg(ptr, with) #else /* !CONFIG_ARC_HAS_LLSC */
/*
* EX instructions is baseline and present in !LLSC too. But in this
* regime it still needs use @atomic_ops_lock spinlock to allow interop
* with cmpxchg() which uses spinlock in !LLSC
* (llist.h use xchg and cmpxchg on sama data)
*/
#define arch_xchg(ptr, val) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
unsigned long __flags; \
\
atomic_ops_lock(__flags); \
\
__asm__ __volatile__( \
" ex %0, [%1] \n" \
: "+r"(_val_) \
: "r"(_p_) \
: "memory"); \
\
atomic_ops_unlock(__flags); \
_val_; \
})
#endif #endif