ceph: encode inodes' parent/d_name in cap reconnect message
Since nautilus, MDS tracks dirfrags whose child inodes have caps in open file table. When MDS recovers, it prefetches all of these dirfrags. This avoids using backtrace to load inodes. But dirfrags prefetch may load lots of useless inodes into cache, and make MDS run out of memory. Recent MDS adds an option that disables dirfrags prefetch. When dirfrags prefetch is disabled. Recovering MDS only prefetches corresponding dir inodes. Including inodes' parent/d_name in cap reconnect message can help MDS to load inodes into its cache. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
bbf5c97901
commit
a33f6432b3
|
@ -3612,6 +3612,39 @@ fail_msg:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct dentry* d_find_primary(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct dentry *alias, *dn = NULL;
|
||||||
|
|
||||||
|
if (hlist_empty(&inode->i_dentry))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
spin_lock(&inode->i_lock);
|
||||||
|
if (hlist_empty(&inode->i_dentry))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (S_ISDIR(inode->i_mode)) {
|
||||||
|
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
|
||||||
|
if (!IS_ROOT(alias))
|
||||||
|
dn = dget(alias);
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
|
||||||
|
spin_lock(&alias->d_lock);
|
||||||
|
if (!d_unhashed(alias) &&
|
||||||
|
(ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
|
||||||
|
dn = dget_dlock(alias);
|
||||||
|
}
|
||||||
|
spin_unlock(&alias->d_lock);
|
||||||
|
if (dn)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock(&inode->i_lock);
|
||||||
|
return dn;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encode information about a cap for a reconnect with the MDS.
|
* Encode information about a cap for a reconnect with the MDS.
|
||||||
*/
|
*/
|
||||||
|
@ -3625,13 +3658,32 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||||
struct ceph_inode_info *ci = cap->ci;
|
struct ceph_inode_info *ci = cap->ci;
|
||||||
struct ceph_reconnect_state *recon_state = arg;
|
struct ceph_reconnect_state *recon_state = arg;
|
||||||
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
||||||
int err;
|
struct dentry *dentry;
|
||||||
|
char *path;
|
||||||
|
int pathlen, err;
|
||||||
|
u64 pathbase;
|
||||||
u64 snap_follows;
|
u64 snap_follows;
|
||||||
|
|
||||||
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
|
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
|
||||||
inode, ceph_vinop(inode), cap, cap->cap_id,
|
inode, ceph_vinop(inode), cap, cap->cap_id,
|
||||||
ceph_cap_string(cap->issued));
|
ceph_cap_string(cap->issued));
|
||||||
|
|
||||||
|
dentry = d_find_primary(inode);
|
||||||
|
if (dentry) {
|
||||||
|
/* set pathbase to parent dir when msg_version >= 2 */
|
||||||
|
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
|
||||||
|
recon_state->msg_version >= 2);
|
||||||
|
dput(dentry);
|
||||||
|
if (IS_ERR(path)) {
|
||||||
|
err = PTR_ERR(path);
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
path = NULL;
|
||||||
|
pathlen = 0;
|
||||||
|
pathbase = 0;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
cap->seq = 0; /* reset cap seq */
|
cap->seq = 0; /* reset cap seq */
|
||||||
cap->issue_seq = 0; /* and issue_seq */
|
cap->issue_seq = 0; /* and issue_seq */
|
||||||
|
@ -3652,7 +3704,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||||
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
|
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
|
||||||
rec.v2.issued = cpu_to_le32(cap->issued);
|
rec.v2.issued = cpu_to_le32(cap->issued);
|
||||||
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||||
rec.v2.pathbase = 0;
|
rec.v2.pathbase = cpu_to_le64(pathbase);
|
||||||
rec.v2.flock_len = (__force __le32)
|
rec.v2.flock_len = (__force __le32)
|
||||||
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
|
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -3663,7 +3715,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||||
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
|
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
|
||||||
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
|
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
|
||||||
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||||
rec.v1.pathbase = 0;
|
rec.v1.pathbase = cpu_to_le64(pathbase);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (list_empty(&ci->i_cap_snaps)) {
|
if (list_empty(&ci->i_cap_snaps)) {
|
||||||
|
@ -3725,7 +3777,7 @@ encode_again:
|
||||||
sizeof(struct ceph_filelock);
|
sizeof(struct ceph_filelock);
|
||||||
rec.v2.flock_len = cpu_to_le32(struct_len);
|
rec.v2.flock_len = cpu_to_le32(struct_len);
|
||||||
|
|
||||||
struct_len += sizeof(u32) + sizeof(rec.v2);
|
struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
|
||||||
|
|
||||||
if (struct_v >= 2)
|
if (struct_v >= 2)
|
||||||
struct_len += sizeof(u64); /* snap_follows */
|
struct_len += sizeof(u64); /* snap_follows */
|
||||||
|
@ -3749,7 +3801,7 @@ encode_again:
|
||||||
ceph_pagelist_encode_8(pagelist, 1);
|
ceph_pagelist_encode_8(pagelist, 1);
|
||||||
ceph_pagelist_encode_32(pagelist, struct_len);
|
ceph_pagelist_encode_32(pagelist, struct_len);
|
||||||
}
|
}
|
||||||
ceph_pagelist_encode_string(pagelist, NULL, 0);
|
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
||||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
||||||
ceph_locks_to_pagelist(flocks, pagelist,
|
ceph_locks_to_pagelist(flocks, pagelist,
|
||||||
num_fcntl_locks, num_flock_locks);
|
num_fcntl_locks, num_flock_locks);
|
||||||
|
@ -3758,39 +3810,20 @@ encode_again:
|
||||||
out_freeflocks:
|
out_freeflocks:
|
||||||
kfree(flocks);
|
kfree(flocks);
|
||||||
} else {
|
} else {
|
||||||
u64 pathbase = 0;
|
|
||||||
int pathlen = 0;
|
|
||||||
char *path = NULL;
|
|
||||||
struct dentry *dentry;
|
|
||||||
|
|
||||||
dentry = d_find_alias(inode);
|
|
||||||
if (dentry) {
|
|
||||||
path = ceph_mdsc_build_path(dentry,
|
|
||||||
&pathlen, &pathbase, 0);
|
|
||||||
dput(dentry);
|
|
||||||
if (IS_ERR(path)) {
|
|
||||||
err = PTR_ERR(path);
|
|
||||||
goto out_err;
|
|
||||||
}
|
|
||||||
rec.v1.pathbase = cpu_to_le64(pathbase);
|
|
||||||
}
|
|
||||||
|
|
||||||
err = ceph_pagelist_reserve(pagelist,
|
err = ceph_pagelist_reserve(pagelist,
|
||||||
sizeof(u64) + sizeof(u32) +
|
sizeof(u64) + sizeof(u32) +
|
||||||
pathlen + sizeof(rec.v1));
|
pathlen + sizeof(rec.v1));
|
||||||
if (err) {
|
if (err)
|
||||||
goto out_freepath;
|
goto out_err;
|
||||||
}
|
|
||||||
|
|
||||||
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
|
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
|
||||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
||||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
||||||
out_freepath:
|
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out_err:
|
out_err:
|
||||||
if (err >= 0)
|
ceph_mdsc_free_path(path, pathlen);
|
||||||
|
if (!err)
|
||||||
recon_state->nr_caps++;
|
recon_state->nr_caps++;
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue