locking/rwsem: Enable lock event counting
Add lock event counting calls so that we can track the number of lock events happening in the rwsem code. With CONFIG_LOCK_EVENT_COUNTS on and booting a 4-socket 112-thread x86-64 system, the rwsem counts after system bootup were as follows: rwsem_opt_fail=261 rwsem_opt_wlock=50636 rwsem_rlock=445 rwsem_rlock_fail=0 rwsem_rlock_fast=22 rwsem_rtrylock=810144 rwsem_sleep_reader=441 rwsem_sleep_writer=310 rwsem_wake_reader=355 rwsem_wake_writer=2335 rwsem_wlock=261 rwsem_wlock_fail=0 rwsem_wtrylock=20583 It can be seen that most of the lock acquisitions in the slowpath were write-locks in the optimistic spinning code path with no sleeping at all. For this system, over 97% of the locks are acquired via optimistic spinning. It illustrates the importance of optimistic spinning in improving the performance of rwsem. Signed-off-by: Waiman Long <longman@redhat.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Davidlohr Bueso <dbueso@suse.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/20190404174320.22416-11-longman@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
bf20616f46
commit
a8654596f0
|
@ -904,7 +904,6 @@ config ARCH_USE_MEMREMAP_PROT
|
||||||
config LOCK_EVENT_COUNTS
|
config LOCK_EVENT_COUNTS
|
||||||
bool "Locking event counts collection"
|
bool "Locking event counts collection"
|
||||||
depends on DEBUG_FS
|
depends on DEBUG_FS
|
||||||
depends on QUEUED_SPINLOCKS
|
|
||||||
---help---
|
---help---
|
||||||
Enable light-weight counting of various locking related events
|
Enable light-weight counting of various locking related events
|
||||||
in the system with minimal performance impact. This reduces
|
in the system with minimal performance impact. This reduces
|
||||||
|
|
|
@ -48,3 +48,20 @@ LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
|
||||||
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
|
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
|
||||||
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
|
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
|
||||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locking events for rwsem
|
||||||
|
*/
|
||||||
|
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
|
||||||
|
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
|
||||||
|
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
|
||||||
|
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
|
||||||
|
LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
|
||||||
|
LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
|
||||||
|
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
|
||||||
|
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
|
||||||
|
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
|
||||||
|
LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
|
||||||
|
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
|
||||||
|
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
|
||||||
|
LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
|
||||||
|
|
|
@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||||
* will notice the queued writer.
|
* will notice the queued writer.
|
||||||
*/
|
*/
|
||||||
wake_q_add(wake_q, waiter->task);
|
wake_q_add(wake_q, waiter->task);
|
||||||
|
lockevent_inc(rwsem_wake_writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
@ -214,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||||
}
|
}
|
||||||
|
|
||||||
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
||||||
|
lockevent_cond_inc(rwsem_wake_reader, woken);
|
||||||
if (list_empty(&sem->wait_list)) {
|
if (list_empty(&sem->wait_list)) {
|
||||||
/* hit end of list above */
|
/* hit end of list above */
|
||||||
adjustment -= RWSEM_WAITING_BIAS;
|
adjustment -= RWSEM_WAITING_BIAS;
|
||||||
|
@ -265,6 +267,7 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
|
||||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
|
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
|
||||||
count + RWSEM_ACTIVE_WRITE_BIAS)) {
|
count + RWSEM_ACTIVE_WRITE_BIAS)) {
|
||||||
rwsem_set_owner(sem);
|
rwsem_set_owner(sem);
|
||||||
|
lockevent_inc(rwsem_opt_wlock);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -389,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
|
||||||
osq_unlock(&sem->osq);
|
osq_unlock(&sem->osq);
|
||||||
done:
|
done:
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
lockevent_cond_inc(rwsem_opt_fail, !taken);
|
||||||
return taken;
|
return taken;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,6 +440,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
|
||||||
if (atomic_long_read(&sem->count) >= 0) {
|
if (atomic_long_read(&sem->count) >= 0) {
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
rwsem_set_reader_owned(sem);
|
rwsem_set_reader_owned(sem);
|
||||||
|
lockevent_inc(rwsem_rlock_fast);
|
||||||
return sem;
|
return sem;
|
||||||
}
|
}
|
||||||
adjustment += RWSEM_WAITING_BIAS;
|
adjustment += RWSEM_WAITING_BIAS;
|
||||||
|
@ -472,9 +477,11 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
schedule();
|
schedule();
|
||||||
|
lockevent_inc(rwsem_sleep_reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
|
lockevent_inc(rwsem_rlock);
|
||||||
return sem;
|
return sem;
|
||||||
out_nolock:
|
out_nolock:
|
||||||
list_del(&waiter.list);
|
list_del(&waiter.list);
|
||||||
|
@ -482,6 +489,7 @@ out_nolock:
|
||||||
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
|
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
|
lockevent_inc(rwsem_rlock_fail);
|
||||||
return ERR_PTR(-EINTR);
|
return ERR_PTR(-EINTR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -575,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
|
||||||
goto out_nolock;
|
goto out_nolock;
|
||||||
|
|
||||||
schedule();
|
schedule();
|
||||||
|
lockevent_inc(rwsem_sleep_writer);
|
||||||
set_current_state(state);
|
set_current_state(state);
|
||||||
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
|
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
|
||||||
|
|
||||||
|
@ -583,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
list_del(&waiter.list);
|
list_del(&waiter.list);
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
|
lockevent_inc(rwsem_wlock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -596,6 +606,7 @@ out_nolock:
|
||||||
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
|
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
|
||||||
raw_spin_unlock_irq(&sem->wait_lock);
|
raw_spin_unlock_irq(&sem->wait_lock);
|
||||||
wake_up_q(&wake_q);
|
wake_up_q(&wake_q);
|
||||||
|
lockevent_inc(rwsem_wlock_fail);
|
||||||
|
|
||||||
return ERR_PTR(-EINTR);
|
return ERR_PTR(-EINTR);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
* is involved. Ideally we would like to track all the readers that own
|
* is involved. Ideally we would like to track all the readers that own
|
||||||
* a rwsem, but the overhead is simply too big.
|
* a rwsem, but the overhead is simply too big.
|
||||||
*/
|
*/
|
||||||
|
#include "lock_events.h"
|
||||||
|
|
||||||
#define RWSEM_READER_OWNED (1UL << 0)
|
#define RWSEM_READER_OWNED (1UL << 0)
|
||||||
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
|
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
|
||||||
|
|
||||||
|
@ -200,6 +202,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
|
||||||
*/
|
*/
|
||||||
long tmp = RWSEM_UNLOCKED_VALUE;
|
long tmp = RWSEM_UNLOCKED_VALUE;
|
||||||
|
|
||||||
|
lockevent_inc(rwsem_rtrylock);
|
||||||
do {
|
do {
|
||||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
|
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
|
||||||
tmp + RWSEM_ACTIVE_READ_BIAS)) {
|
tmp + RWSEM_ACTIVE_READ_BIAS)) {
|
||||||
|
@ -241,6 +244,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
|
||||||
{
|
{
|
||||||
long tmp;
|
long tmp;
|
||||||
|
|
||||||
|
lockevent_inc(rwsem_wtrylock);
|
||||||
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
|
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
|
||||||
RWSEM_ACTIVE_WRITE_BIAS);
|
RWSEM_ACTIVE_WRITE_BIAS);
|
||||||
if (tmp == RWSEM_UNLOCKED_VALUE) {
|
if (tmp == RWSEM_UNLOCKED_VALUE) {
|
||||||
|
|
Loading…
Reference in New Issue