ceph: periodically trim stale dentries

Previous commit make VFS delete stale dentry when last reference is
dropped. Lease also can become invalid when corresponding dentry has
no reference. This patch make cephfs periodically scan lease list,
delete corresponding dentry if lease is invalid.

There are two types of lease, dentry lease and dir lease. dentry lease
has life time and applies to singe dentry. Dentry lease is added to tail
of a list when it's updated, leases at front of the list will expire
first. Dir lease is CEPH_CAP_FILE_SHARED on directory inode, it applies
to all dentries in the directory. Dentries have dir leases are added to
another list. Dentries in the list are periodically checked in a round
robin manner.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Yan, Zheng 2019-01-31 16:55:51 +08:00 committed by Ilya Dryomov
parent 1e9c2eb681
commit 37c4efc1dd
5 changed files with 312 additions and 68 deletions

View File

@ -29,6 +29,9 @@
const struct dentry_operations ceph_dentry_ops; const struct dentry_operations ceph_dentry_ops;
static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
static int __dir_lease_try_check(const struct dentry *dentry);
/* /*
* Initialize ceph dentry state. * Initialize ceph dentry state.
*/ */
@ -44,7 +47,7 @@ static int ceph_d_init(struct dentry *dentry)
di->lease_session = NULL; di->lease_session = NULL;
di->time = jiffies; di->time = jiffies;
dentry->d_fsdata = di; dentry->d_fsdata = di;
ceph_dentry_lru_add(dentry); INIT_LIST_HEAD(&di->lease_list);
return 0; return 0;
} }
@ -241,6 +244,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
goto out; goto out;
} }
if (fpos_cmp(ctx->pos, di->offset) <= 0) { if (fpos_cmp(ctx->pos, di->offset) <= 0) {
__ceph_dentry_dir_lease_touch(di);
emit_dentry = true; emit_dentry = true;
} }
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
@ -1124,14 +1128,260 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
return err; return err;
} }
/*
* Move dentry to tail of mdsc->dentry_leases list when lease is updated.
* Leases at front of the list will expire first. (Assume all leases have
* similar duration)
*
* Called under dentry->d_lock.
*/
void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
{
struct dentry *dn = di->dentry;
struct ceph_mds_client *mdsc;
dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn);
di->flags |= CEPH_DENTRY_LEASE_LIST;
if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
di->flags |= CEPH_DENTRY_REFERENCED;
return;
}
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_list_lock);
list_move_tail(&di->lease_list, &mdsc->dentry_leases);
spin_unlock(&mdsc->dentry_list_lock);
}
static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc,
struct ceph_dentry_info *di)
{
di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED);
di->lease_gen = 0;
di->time = jiffies;
list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases);
}
/*
* When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases
* list if it's not in the list, otherwise set 'referenced' flag.
*
* Called under dentry->d_lock.
*/
void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
{
struct dentry *dn = di->dentry;
struct ceph_mds_client *mdsc;
dout("dentry_dir_lease_touch %p %p '%pd' (offset %lld)\n",
di, dn, dn, di->offset);
if (!list_empty(&di->lease_list)) {
if (di->flags & CEPH_DENTRY_LEASE_LIST) {
/* don't remove dentry from dentry lease list
* if its lease is valid */
if (__dentry_lease_is_valid(di))
return;
} else {
di->flags |= CEPH_DENTRY_REFERENCED;
return;
}
}
if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
di->flags |= CEPH_DENTRY_REFERENCED;
di->flags &= ~CEPH_DENTRY_LEASE_LIST;
return;
}
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_list_lock);
__dentry_dir_lease_touch(mdsc, di),
spin_unlock(&mdsc->dentry_list_lock);
}
static void __dentry_lease_unlist(struct ceph_dentry_info *di)
{
struct ceph_mds_client *mdsc;
if (di->flags & CEPH_DENTRY_SHRINK_LIST)
return;
if (list_empty(&di->lease_list))
return;
mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc;
spin_lock(&mdsc->dentry_list_lock);
list_del_init(&di->lease_list);
spin_unlock(&mdsc->dentry_list_lock);
}
enum {
KEEP = 0,
DELETE = 1,
TOUCH = 2,
STOP = 4,
};
struct ceph_lease_walk_control {
bool dir_lease;
unsigned long nr_to_scan;
unsigned long dir_lease_ttl;
};
static unsigned long
__dentry_leases_walk(struct ceph_mds_client *mdsc,
struct ceph_lease_walk_control *lwc,
int (*check)(struct dentry*, void*))
{
struct ceph_dentry_info *di, *tmp;
struct dentry *dentry, *last = NULL;
struct list_head* list;
LIST_HEAD(dispose);
unsigned long freed = 0;
int ret = 0;
list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases;
spin_lock(&mdsc->dentry_list_lock);
list_for_each_entry_safe(di, tmp, list, lease_list) {
if (!lwc->nr_to_scan)
break;
--lwc->nr_to_scan;
dentry = di->dentry;
if (last == dentry)
break;
if (!spin_trylock(&dentry->d_lock))
continue;
if (dentry->d_lockref.count < 0) {
list_del_init(&di->lease_list);
goto next;
}
ret = check(dentry, lwc);
if (ret & TOUCH) {
/* move it into tail of dir lease list */
__dentry_dir_lease_touch(mdsc, di);
if (!last)
last = dentry;
}
if (ret & DELETE) {
/* stale lease */
di->flags &= ~CEPH_DENTRY_REFERENCED;
if (dentry->d_lockref.count > 0) {
/* update_dentry_lease() will re-add
* it to lease list, or
* ceph_d_delete() will return 1 when
* last reference is dropped */
list_del_init(&di->lease_list);
} else {
di->flags |= CEPH_DENTRY_SHRINK_LIST;
list_move_tail(&di->lease_list, &dispose);
dget_dlock(dentry);
}
}
next:
spin_unlock(&dentry->d_lock);
if (ret & STOP)
break;
}
spin_unlock(&mdsc->dentry_list_lock);
while (!list_empty(&dispose)) {
di = list_first_entry(&dispose, struct ceph_dentry_info,
lease_list);
dentry = di->dentry;
spin_lock(&dentry->d_lock);
list_del_init(&di->lease_list);
di->flags &= ~CEPH_DENTRY_SHRINK_LIST;
if (di->flags & CEPH_DENTRY_REFERENCED) {
spin_lock(&mdsc->dentry_list_lock);
if (di->flags & CEPH_DENTRY_LEASE_LIST) {
list_add_tail(&di->lease_list,
&mdsc->dentry_leases);
} else {
__dentry_dir_lease_touch(mdsc, di);
}
spin_unlock(&mdsc->dentry_list_lock);
} else {
freed++;
}
spin_unlock(&dentry->d_lock);
/* ceph_d_delete() does the trick */
dput(dentry);
}
return freed;
}
static int __dentry_lease_check(struct dentry *dentry, void *arg)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
int ret;
if (__dentry_lease_is_valid(di))
return STOP;
ret = __dir_lease_try_check(dentry);
if (ret == -EBUSY)
return KEEP;
if (ret > 0)
return TOUCH;
return DELETE;
}
static int __dir_lease_check(struct dentry *dentry, void *arg)
{
struct ceph_lease_walk_control *lwc = arg;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int ret = __dir_lease_try_check(dentry);
if (ret == -EBUSY)
return KEEP;
if (ret > 0) {
if (time_before(jiffies, di->time + lwc->dir_lease_ttl))
return STOP;
/* Move dentry to tail of dir lease list if we don't want
* to delete it. So dentries in the list are checked in a
* round robin manner */
return TOUCH;
}
return DELETE;
}
int ceph_trim_dentries(struct ceph_mds_client *mdsc)
{
struct ceph_lease_walk_control lwc;
unsigned long freed;
lwc.dir_lease = false;
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
if (!lwc.nr_to_scan) /* more invalid leases */
return -EAGAIN;
if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE)
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
lwc.dir_lease = true;
freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
if (!lwc.nr_to_scan) /* more to check */
return -EAGAIN;
return freed > 0 ? 1 : 0;
}
/* /*
* Ensure a dentry lease will no longer revalidate. * Ensure a dentry lease will no longer revalidate.
*/ */
void ceph_invalidate_dentry_lease(struct dentry *dentry) void ceph_invalidate_dentry_lease(struct dentry *dentry)
{ {
struct ceph_dentry_info *di = ceph_dentry(dentry);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
ceph_dentry(dentry)->time = jiffies; di->time = jiffies;
ceph_dentry(dentry)->lease_shared_gen = 0; di->lease_shared_gen = 0;
__dentry_lease_unlist(di);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
} }
@ -1251,6 +1501,8 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen) if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen)
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (valid)
__ceph_dentry_dir_lease_touch(di);
dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
dir, (unsigned)atomic_read(&ci->i_shared_gen), dir, (unsigned)atomic_read(&ci->i_shared_gen),
dentry, (unsigned)di->lease_shared_gen, valid); dentry, (unsigned)di->lease_shared_gen, valid);
@ -1343,11 +1595,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
} }
dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
if (valid) { if (!valid)
ceph_dentry_lru_touch(dentry);
} else {
ceph_dir_clear_complete(dir); ceph_dir_clear_complete(dir);
}
if (!(flags & LOOKUP_RCU)) if (!(flags & LOOKUP_RCU))
dput(parent); dput(parent);
@ -1387,9 +1636,9 @@ static void ceph_d_release(struct dentry *dentry)
struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_dentry_info *di = ceph_dentry(dentry);
dout("d_release %p\n", dentry); dout("d_release %p\n", dentry);
ceph_dentry_lru_del(dentry);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
__dentry_lease_unlist(di);
dentry->d_fsdata = NULL; dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
@ -1490,49 +1739,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
return size - left; return size - left;
} }
/*
* We maintain a private dentry LRU.
*
* FIXME: this needs to be changed to a per-mds lru to be useful.
*/
void ceph_dentry_lru_add(struct dentry *dn)
{
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_add_tail(&di->lru, &mdsc->dentry_lru);
mdsc->num_dentry++;
spin_unlock(&mdsc->dentry_lru_lock);
}
void ceph_dentry_lru_touch(struct dentry *dn)
{
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn,
di->offset);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_move_tail(&di->lru, &mdsc->dentry_lru);
spin_unlock(&mdsc->dentry_lru_lock);
}
void ceph_dentry_lru_del(struct dentry *dn)
{
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_del_init(&di->lru);
mdsc->num_dentry--;
spin_unlock(&mdsc->dentry_lru_lock);
}
/* /*
* Return name hash for a given dentry. This is dependent on * Return name hash for a given dentry. This is dependent on

View File

@ -1076,9 +1076,10 @@ static void update_dentry_lease(struct dentry *dentry,
goto out_unlock; goto out_unlock;
di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
if (duration == 0) {
if (duration == 0) __ceph_dentry_dir_lease_touch(di);
goto out_unlock; goto out_unlock;
}
if (di->lease_gen == session->s_cap_gen && if (di->lease_gen == session->s_cap_gen &&
time_before(ttl, di->time)) time_before(ttl, di->time))
@ -1089,8 +1090,6 @@ static void update_dentry_lease(struct dentry *dentry,
di->lease_session = NULL; di->lease_session = NULL;
} }
ceph_dentry_lru_touch(dentry);
if (!di->lease_session) if (!di->lease_session)
di->lease_session = ceph_get_mds_session(session); di->lease_session = ceph_get_mds_session(session);
di->lease_gen = session->s_cap_gen; di->lease_gen = session->s_cap_gen;
@ -1098,6 +1097,8 @@ static void update_dentry_lease(struct dentry *dentry,
di->lease_renew_after = half_ttl; di->lease_renew_after = half_ttl;
di->lease_renew_from = 0; di->lease_renew_from = 0;
di->time = ttl; di->time = ttl;
__ceph_dentry_lease_touch(di);
out_unlock: out_unlock:
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
if (old_lease_session) if (old_lease_session)

View File

@ -58,6 +58,7 @@ struct ceph_reconnect_state {
static void __wake_requests(struct ceph_mds_client *mdsc, static void __wake_requests(struct ceph_mds_client *mdsc,
struct list_head *head); struct list_head *head);
static void ceph_cap_release_work(struct work_struct *work); static void ceph_cap_release_work(struct work_struct *work);
static void ceph_cap_reclaim_work(struct work_struct *work);
static const struct ceph_connection_operations mds_con_ops; static const struct ceph_connection_operations mds_con_ops;
@ -1943,6 +1944,27 @@ void __ceph_queue_cap_release(struct ceph_mds_session *session,
ceph_flush_cap_releases(session->s_mdsc, session); ceph_flush_cap_releases(session->s_mdsc, session);
} }
static void ceph_cap_reclaim_work(struct work_struct *work)
{
struct ceph_mds_client *mdsc =
container_of(work, struct ceph_mds_client, cap_reclaim_work);
int ret = ceph_trim_dentries(mdsc);
if (ret == -EAGAIN)
ceph_queue_cap_reclaim_work(mdsc);
}
void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
{
if (mdsc->stopping)
return;
if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) {
dout("caps reclaim work queued\n");
} else {
dout("failed to queue caps release work\n");
}
}
/* /*
* requests * requests
*/ */
@ -3957,9 +3979,6 @@ static void delayed_work(struct work_struct *work)
int renew_caps; int renew_caps;
dout("mdsc delayed_work\n"); dout("mdsc delayed_work\n");
ceph_check_delayed_caps(mdsc);
ceph_trim_snapid_map(mdsc);
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
renew_interval = mdsc->mdsmap->m_session_timeout >> 2; renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
@ -4007,6 +4026,12 @@ static void delayed_work(struct work_struct *work)
} }
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
ceph_check_delayed_caps(mdsc);
ceph_queue_cap_reclaim_work(mdsc);
ceph_trim_snapid_map(mdsc);
schedule_delayed(mdsc); schedule_delayed(mdsc);
} }
@ -4057,8 +4082,11 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
mdsc->num_cap_flushing = 0; mdsc->num_cap_flushing = 0;
spin_lock_init(&mdsc->cap_dirty_lock); spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq); init_waitqueue_head(&mdsc->cap_flushing_wq);
spin_lock_init(&mdsc->dentry_lru_lock); INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
INIT_LIST_HEAD(&mdsc->dentry_lru);
spin_lock_init(&mdsc->dentry_list_lock);
INIT_LIST_HEAD(&mdsc->dentry_leases);
INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
ceph_caps_init(mdsc); ceph_caps_init(mdsc);
ceph_adjust_min_caps(mdsc, fsc->min_caps); ceph_adjust_min_caps(mdsc, fsc->min_caps);
@ -4261,9 +4289,9 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
ceph_cleanup_snapid_map(mdsc); ceph_cleanup_snapid_map(mdsc);
ceph_cleanup_empty_realms(mdsc); ceph_cleanup_empty_realms(mdsc);
cancel_work_sync(&mdsc->cap_reclaim_work);
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
dout("stopped\n"); dout("stopped\n");

View File

@ -378,6 +378,8 @@ struct ceph_mds_client {
spinlock_t cap_dirty_lock; /* protects above items */ spinlock_t cap_dirty_lock; /* protects above items */
wait_queue_head_t cap_flushing_wq; wait_queue_head_t cap_flushing_wq;
struct work_struct cap_reclaim_work;
/* /*
* Cap reservations * Cap reservations
* *
@ -398,9 +400,9 @@ struct ceph_mds_client {
int caps_avail_count; /* unused, unreserved */ int caps_avail_count; /* unused, unreserved */
int caps_min_count; /* keep at least this many int caps_min_count; /* keep at least this many
(unreserved) */ (unreserved) */
spinlock_t dentry_lru_lock; spinlock_t dentry_list_lock;
struct list_head dentry_lru; struct list_head dentry_leases; /* fifo list */
int num_dentry; struct list_head dentry_dir_leases; /* lru list */
spinlock_t snapid_map_lock; spinlock_t snapid_map_lock;
struct rb_root snapid_map_tree; struct rb_root snapid_map_tree;
@ -462,6 +464,7 @@ extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
struct ceph_cap *cap); struct ceph_cap *cap);
extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session); struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,

View File

@ -262,17 +262,22 @@ struct ceph_inode_xattr {
* Ceph dentry state * Ceph dentry state
*/ */
struct ceph_dentry_info { struct ceph_dentry_info {
struct dentry *dentry;
struct ceph_mds_session *lease_session; struct ceph_mds_session *lease_session;
struct list_head lease_list;
unsigned flags;
int lease_shared_gen; int lease_shared_gen;
u32 lease_gen; u32 lease_gen;
u32 lease_seq; u32 lease_seq;
unsigned long lease_renew_after, lease_renew_from; unsigned long lease_renew_after, lease_renew_from;
struct list_head lru;
struct dentry *dentry;
unsigned long time; unsigned long time;
u64 offset; u64 offset;
}; };
#define CEPH_DENTRY_REFERENCED 1
#define CEPH_DENTRY_LEASE_LIST 2
#define CEPH_DENTRY_SHRINK_LIST 4
struct ceph_inode_xattrs_info { struct ceph_inode_xattrs_info {
/* /*
* (still encoded) xattr blob. we avoid the overhead of parsing * (still encoded) xattr blob. we avoid the overhead of parsing
@ -1064,10 +1069,10 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req,
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err); struct dentry *dentry, int err);
extern void ceph_dentry_lru_add(struct dentry *dn); extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
extern void ceph_dentry_lru_touch(struct dentry *dn); extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
extern int ceph_trim_dentries(struct ceph_mds_client *mdsc);
extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl); extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);