fs/dcache: Move the wakeup from __d_lookup_done() to the caller.
__d_lookup_done() wakes waiters on dentry->d_wait. On PREEMPT_RT we are not allowed to do that with preemption disabled, since the wakeup acquired wait_queue_head::lock, which is a "sleeping" spinlock on RT. Calling it under dentry->d_lock is not a problem, since that is also a "sleeping" spinlock on the same configs. Unfortunately, two of its callers (__d_add() and __d_move()) are holding more than just ->d_lock and that needs to be dealt with. The key observation is that wakeup can be moved to any point before dropping ->d_lock. As a first step to solve this, move the wake up outside of the hlist_bl_lock() held section. This is safe because: Waiters get inserted into ->d_wait only after they'd taken ->d_lock and observed DCACHE_PAR_LOOKUP in flags. As long as they are woken up (and evicted from the queue) between the moment __d_lookup_done() has removed DCACHE_PAR_LOOKUP and dropping ->d_lock, we are safe, since the waitqueue ->d_wait points to won't get destroyed without having __d_lookup_done(dentry) called (under ->d_lock). ->d_wait is set only by d_alloc_parallel() and only in case when it returns a freshly allocated in-lookup dentry. Whenever that happens, we are guaranteed that __d_lookup_done() will be called for resulting dentry (under ->d_lock) before the wq in question gets destroyed. With two exceptions wq lives in call frame of the caller of d_alloc_parallel() and we have an explicit d_lookup_done() on the resulting in-lookup dentry before we leave that frame. One of those exceptions is nfs_call_unlink(), where wq is embedded into (dynamically allocated) struct nfs_unlinkdata. It is destroyed in nfs_async_unlink_release() after an explicit d_lookup_done() on the dentry wq went into. Remaining exception is d_add_ci(). There wq is what we'd found in ->d_wait of d_add_ci() argument. Callers of d_add_ci() are two instances of ->d_lookup() and they must have been given an in-lookup dentry. Which means that they'd been called by __lookup_slow() or lookup_open(), with wq in the call frame of one of those. Result of d_alloc_parallel() in d_add_ci() is fed to d_splice_alias(), which either returns non-NULL (and d_add_ci() does d_lookup_done()) or feeds dentry to __d_add() that will do __d_lookup_done() under ->d_lock. That concludes the analysis. Let __d_lookup_unhash(): 1) Lock the lookup hash and clear DCACHE_PAR_LOOKUP 2) Unhash the dentry 3) Retrieve and clear dentry::d_wait 4) Unlock the hash and return the retrieved waitqueue head pointer 5) Let the caller handle the wake up. 6) Rename __d_lookup_done() to __d_lookup_unhash_wake() to enforce build failures for OOT code that used __d_lookup_done() and is not aware of the new return value. This does not yet solve the PREEMPT_RT problem completely because preemption is still disabled due to i_dir_seq being held for write. This will be addressed in subsequent steps. An alternative solution would be to switch the waitqueue to a simple waitqueue, but aside of Linus not being a fan of them, moving the wake up closer to the place where dentry::lock is unlocked reduces lock contention time for the woken up waiter. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://lkml.kernel.org/r/20220613140712.77932-3-bigeasy@linutronix.de Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
cf634d540a
commit
45f78b0a27
35
fs/dcache.c
35
fs/dcache.c
|
@ -2712,32 +2712,51 @@ mismatch:
|
|||
}
|
||||
EXPORT_SYMBOL(d_alloc_parallel);
|
||||
|
||||
void __d_lookup_done(struct dentry *dentry)
|
||||
/*
|
||||
* - Unhash the dentry
|
||||
* - Retrieve and clear the waitqueue head in dentry
|
||||
* - Return the waitqueue head
|
||||
*/
|
||||
static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
|
||||
{
|
||||
struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
|
||||
dentry->d_name.hash);
|
||||
wait_queue_head_t *d_wait;
|
||||
struct hlist_bl_head *b;
|
||||
|
||||
lockdep_assert_held(&dentry->d_lock);
|
||||
|
||||
b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
|
||||
hlist_bl_lock(b);
|
||||
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
|
||||
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
|
||||
wake_up_all(dentry->d_wait);
|
||||
d_wait = dentry->d_wait;
|
||||
dentry->d_wait = NULL;
|
||||
hlist_bl_unlock(b);
|
||||
INIT_HLIST_NODE(&dentry->d_u.d_alias);
|
||||
INIT_LIST_HEAD(&dentry->d_lru);
|
||||
return d_wait;
|
||||
}
|
||||
EXPORT_SYMBOL(__d_lookup_done);
|
||||
|
||||
void __d_lookup_unhash_wake(struct dentry *dentry)
|
||||
{
|
||||
spin_lock(&dentry->d_lock);
|
||||
wake_up_all(__d_lookup_unhash(dentry));
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(__d_lookup_unhash_wake);
|
||||
|
||||
/* inode->i_lock held if inode is non-NULL */
|
||||
|
||||
static inline void __d_add(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
wait_queue_head_t *d_wait;
|
||||
struct inode *dir = NULL;
|
||||
unsigned n;
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (unlikely(d_in_lookup(dentry))) {
|
||||
dir = dentry->d_parent->d_inode;
|
||||
n = start_dir_add(dir);
|
||||
__d_lookup_done(dentry);
|
||||
d_wait = __d_lookup_unhash(dentry);
|
||||
wake_up_all(d_wait);
|
||||
}
|
||||
if (inode) {
|
||||
unsigned add_flags = d_flags_for_inode(inode);
|
||||
|
@ -2896,6 +2915,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
|
|||
bool exchange)
|
||||
{
|
||||
struct dentry *old_parent, *p;
|
||||
wait_queue_head_t *d_wait;
|
||||
struct inode *dir = NULL;
|
||||
unsigned n;
|
||||
|
||||
|
@ -2926,7 +2946,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
|
|||
if (unlikely(d_in_lookup(target))) {
|
||||
dir = target->d_parent->d_inode;
|
||||
n = start_dir_add(dir);
|
||||
__d_lookup_done(target);
|
||||
d_wait = __d_lookup_unhash(target);
|
||||
wake_up_all(d_wait);
|
||||
}
|
||||
|
||||
write_seqcount_begin(&dentry->d_seq);
|
||||
|
|
|
@ -349,7 +349,7 @@ static inline void dont_mount(struct dentry *dentry)
|
|||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
|
||||
extern void __d_lookup_done(struct dentry *);
|
||||
extern void __d_lookup_unhash_wake(struct dentry *dentry);
|
||||
|
||||
static inline int d_in_lookup(const struct dentry *dentry)
|
||||
{
|
||||
|
@ -358,11 +358,8 @@ static inline int d_in_lookup(const struct dentry *dentry)
|
|||
|
||||
static inline void d_lookup_done(struct dentry *dentry)
|
||||
{
|
||||
if (unlikely(d_in_lookup(dentry))) {
|
||||
spin_lock(&dentry->d_lock);
|
||||
__d_lookup_done(dentry);
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
if (unlikely(d_in_lookup(dentry)))
|
||||
__d_lookup_unhash_wake(dentry);
|
||||
}
|
||||
|
||||
extern void dput(struct dentry *);
|
||||
|
|
Loading…
Reference in New Issue