A few filesystem improvements, with a rather nasty use-after-free fix
from Xiubo intended for stable. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmRT9yoTHGlkcnlvbW92 QGdtYWlsLmNvbQAKCRBKf944AhHzi0dKB/9bx9W3GH7YnlJNkUq9eNAQjKlQ7B6s i1T/ulPW3kidNwlK5z2ke7bLVN6CzZotOCjAqXHvyWAPFC46NQW+i6jLGgi0PVac acCNuF5Vda5cuKN1xAumGsxCiAScCLvyr1v6BvkkRogrOMTnm5qOhUJn/ED2sjrt 02OPSpQ1Mt586GRNiYz4PaaNr9NgTUVHE/BV2GHX1EuTxNhMaeIVnH8xxN6DU9lg LN5dMeCsYbM0hCOccu5zdnGtCy/OGn6oAmVWpiXFJ5WJFJV19QO5sg8BenzNO1gm Hj66nEbN9cvxZqLeBPvWe+GeNlK/LR3c6jQQgTeniVqm0qJmf29FYWYu =gK0m -----END PGP SIGNATURE----- Merge tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client Pull ceph updates from Ilya Dryomov: "A few filesystem improvements, with a rather nasty use-after-free fix from Xiubo intended for stable" * tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client: ceph: reorder fields in 'struct ceph_snapid_map' ceph: pass ino# instead of old_dentry if it's disconnected ceph: fix potential use-after-free bug when trimming caps ceph: implement writeback livelock avoidance using page tagging ceph: do not print the whole xattr value if it's too long
This commit is contained in:
commit
3c4aa44343
|
@ -808,6 +808,7 @@ static int ceph_writepages_start(struct address_space *mapping,
|
|||
bool should_loop, range_whole = false;
|
||||
bool done = false;
|
||||
bool caching = ceph_is_cache_enabled(inode);
|
||||
xa_mark_t tag;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||
fsc->write_congested)
|
||||
|
@ -834,6 +835,11 @@ static int ceph_writepages_start(struct address_space *mapping,
|
|||
start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
|
||||
index = start_index;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
|
||||
tag = PAGECACHE_TAG_TOWRITE;
|
||||
} else {
|
||||
tag = PAGECACHE_TAG_DIRTY;
|
||||
}
|
||||
retry:
|
||||
/* find oldest snap context with dirty data */
|
||||
snapc = get_oldest_context(inode, &ceph_wbc, NULL);
|
||||
|
@ -872,6 +878,9 @@ retry:
|
|||
dout(" non-head snapc, range whole\n");
|
||||
}
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
||||
tag_pages_for_writeback(mapping, index, end);
|
||||
|
||||
ceph_put_snap_context(last_snapc);
|
||||
last_snapc = snapc;
|
||||
|
||||
|
@ -888,7 +897,7 @@ retry:
|
|||
|
||||
get_more_pages:
|
||||
nr_folios = filemap_get_folios_tag(mapping, &index,
|
||||
end, PAGECACHE_TAG_DIRTY, &fbatch);
|
||||
end, tag, &fbatch);
|
||||
dout("pagevec_lookup_range_tag got %d\n", nr_folios);
|
||||
if (!nr_folios && !locked_pages)
|
||||
break;
|
||||
|
|
|
@ -431,7 +431,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
|
|||
*
|
||||
* Called with i_ceph_lock held.
|
||||
*/
|
||||
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
|
||||
struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
|
||||
{
|
||||
struct ceph_cap *cap;
|
||||
struct rb_node *n = ci->i_caps.rb_node;
|
||||
|
|
|
@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_file *s, void *p)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
|
||||
static int caps_show_cb(struct inode *inode, int mds, void *p)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct seq_file *s = p;
|
||||
struct ceph_cap *cap;
|
||||
|
||||
seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
|
||||
cap->session->s_mds,
|
||||
ceph_cap_string(cap->issued),
|
||||
ceph_cap_string(cap->implemented));
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap = __get_cap_for_mds(ci, mds);
|
||||
if (cap)
|
||||
seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
|
||||
cap->session->s_mds,
|
||||
ceph_cap_string(cap->issued),
|
||||
ceph_cap_string(cap->implemented));
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1050,6 +1050,9 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
|
|||
struct ceph_mds_request *req;
|
||||
int err;
|
||||
|
||||
if (dentry->d_flags & DCACHE_DISCONNECTED)
|
||||
return -EINVAL;
|
||||
|
||||
err = ceph_wait_on_conflict_unlink(dentry);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1057,8 +1060,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
|
|||
if (ceph_snap(dir) != CEPH_NOSNAP)
|
||||
return -EROFS;
|
||||
|
||||
dout("link in dir %p old_dentry %p dentry %p\n", dir,
|
||||
old_dentry, dentry);
|
||||
dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
|
||||
dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
|
||||
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
|
||||
if (IS_ERR(req)) {
|
||||
d_drop(dentry);
|
||||
|
@ -1067,6 +1070,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
|
|||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_old_dentry = dget(old_dentry);
|
||||
/*
|
||||
* The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
|
||||
* will just pass the ino# to MDSs.
|
||||
*/
|
||||
if (old_dentry->d_flags & DCACHE_DISCONNECTED)
|
||||
req->r_ino2 = ceph_vino(d_inode(old_dentry));
|
||||
req->r_parent = dir;
|
||||
ihold(dir);
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
|
|
|
@ -1632,8 +1632,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
|
|||
* Caller must hold session s_mutex.
|
||||
*/
|
||||
int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
||||
int (*cb)(struct inode *, struct ceph_cap *,
|
||||
void *), void *arg)
|
||||
int (*cb)(struct inode *, int mds, void *),
|
||||
void *arg)
|
||||
{
|
||||
struct list_head *p;
|
||||
struct ceph_cap *cap;
|
||||
|
@ -1645,6 +1645,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
|||
spin_lock(&session->s_cap_lock);
|
||||
p = session->s_caps.next;
|
||||
while (p != &session->s_caps) {
|
||||
int mds;
|
||||
|
||||
cap = list_entry(p, struct ceph_cap, session_caps);
|
||||
inode = igrab(&cap->ci->netfs.inode);
|
||||
if (!inode) {
|
||||
|
@ -1652,6 +1654,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
|||
continue;
|
||||
}
|
||||
session->s_cap_iterator = cap;
|
||||
mds = cap->mds;
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
|
||||
if (last_inode) {
|
||||
|
@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
|||
old_cap = NULL;
|
||||
}
|
||||
|
||||
ret = cb(inode, cap, arg);
|
||||
ret = cb(inode, mds, arg);
|
||||
last_inode = inode;
|
||||
|
||||
spin_lock(&session->s_cap_lock);
|
||||
|
@ -1696,20 +1699,25 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
void *arg)
|
||||
static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
bool invalidate = false;
|
||||
int iputs;
|
||||
struct ceph_cap *cap;
|
||||
int iputs = 0;
|
||||
|
||||
dout("removing cap %p, ci is %p, inode is %p\n",
|
||||
cap, ci, &ci->netfs.inode);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
|
||||
cap = __get_cap_for_mds(ci, mds);
|
||||
if (cap) {
|
||||
dout(" removing cap %p, ci is %p, inode is %p\n",
|
||||
cap, ci, &ci->netfs.inode);
|
||||
|
||||
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
wake_up_all(&ci->i_cap_wq);
|
||||
if (cap)
|
||||
wake_up_all(&ci->i_cap_wq);
|
||||
if (invalidate)
|
||||
ceph_queue_invalidate(inode);
|
||||
while (iputs--)
|
||||
|
@ -1780,8 +1788,7 @@ enum {
|
|||
*
|
||||
* caller must hold s_mutex.
|
||||
*/
|
||||
static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
void *arg)
|
||||
static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
unsigned long ev = (unsigned long)arg;
|
||||
|
@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
|
|||
ci->i_requested_max_size = 0;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
} else if (ev == RENEWCAPS) {
|
||||
if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
|
||||
/* mds did not re-issue stale cap */
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
struct ceph_cap *cap;
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap = __get_cap_for_mds(ci, mds);
|
||||
/* mds did not re-issue stale cap */
|
||||
if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
|
||||
cap->issued = cap->implemented = CEPH_CAP_PIN;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
} else if (ev == FORCE_RO) {
|
||||
}
|
||||
wake_up_all(&ci->i_cap_wq);
|
||||
|
@ -1959,16 +1968,22 @@ out:
|
|||
* Yes, this is a bit sloppy. Our only real goal here is to respond to
|
||||
* memory pressure from the MDS, though, so it needn't be perfect.
|
||||
*/
|
||||
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
|
||||
static int trim_caps_cb(struct inode *inode, int mds, void *arg)
|
||||
{
|
||||
int *remaining = arg;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
int used, wanted, oissued, mine;
|
||||
struct ceph_cap *cap;
|
||||
|
||||
if (*remaining <= 0)
|
||||
return -1;
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap = __get_cap_for_mds(ci, mds);
|
||||
if (!cap) {
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return 0;
|
||||
}
|
||||
mine = cap->issued | cap->implemented;
|
||||
used = __ceph_caps_used(ci);
|
||||
wanted = __ceph_caps_file_wanted(ci);
|
||||
|
@ -2555,6 +2570,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
|||
u64 ino1 = 0, ino2 = 0;
|
||||
int pathlen1 = 0, pathlen2 = 0;
|
||||
bool freepath1 = false, freepath2 = false;
|
||||
struct dentry *old_dentry = NULL;
|
||||
int len;
|
||||
u16 releases;
|
||||
void *p, *end;
|
||||
|
@ -2572,7 +2588,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
|||
}
|
||||
|
||||
/* If r_old_dentry is set, then assume that its parent is locked */
|
||||
ret = set_request_path_attr(NULL, req->r_old_dentry,
|
||||
if (req->r_old_dentry &&
|
||||
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
|
||||
old_dentry = req->r_old_dentry;
|
||||
ret = set_request_path_attr(NULL, old_dentry,
|
||||
req->r_old_dentry_dir,
|
||||
req->r_path2, req->r_ino2.ino,
|
||||
&path2, &pathlen2, &ino2, &freepath2, true);
|
||||
|
@ -3911,26 +3930,22 @@ out_unlock:
|
|||
/*
|
||||
* Encode information about a cap for a reconnect with the MDS.
|
||||
*/
|
||||
static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
void *arg)
|
||||
static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
|
||||
{
|
||||
union {
|
||||
struct ceph_mds_cap_reconnect v2;
|
||||
struct ceph_mds_cap_reconnect_v1 v1;
|
||||
} rec;
|
||||
struct ceph_inode_info *ci = cap->ci;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_reconnect_state *recon_state = arg;
|
||||
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
||||
struct dentry *dentry;
|
||||
struct ceph_cap *cap;
|
||||
char *path;
|
||||
int pathlen = 0, err;
|
||||
int pathlen = 0, err = 0;
|
||||
u64 pathbase;
|
||||
u64 snap_follows;
|
||||
|
||||
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
|
||||
inode, ceph_vinop(inode), cap, cap->cap_id,
|
||||
ceph_cap_string(cap->issued));
|
||||
|
||||
dentry = d_find_primary(inode);
|
||||
if (dentry) {
|
||||
/* set pathbase to parent dir when msg_version >= 2 */
|
||||
|
@ -3947,6 +3962,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
|||
}
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap = __get_cap_for_mds(ci, mds);
|
||||
if (!cap) {
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
goto out_err;
|
||||
}
|
||||
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
|
||||
inode, ceph_vinop(inode), cap, cap->cap_id,
|
||||
ceph_cap_string(cap->issued));
|
||||
|
||||
cap->seq = 0; /* reset cap seq */
|
||||
cap->issue_seq = 0; /* and issue_seq */
|
||||
cap->mseq = 0; /* and migrate_seq */
|
||||
|
|
|
@ -355,8 +355,8 @@ struct ceph_snapid_map {
|
|||
struct rb_node node;
|
||||
struct list_head lru;
|
||||
atomic_t ref;
|
||||
u64 snap;
|
||||
dev_t dev;
|
||||
u64 snap;
|
||||
unsigned long last_used;
|
||||
};
|
||||
|
||||
|
@ -541,8 +541,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
|
|||
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
|
||||
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
|
||||
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
||||
int (*cb)(struct inode *,
|
||||
struct ceph_cap *, void *),
|
||||
int (*cb)(struct inode *, int mds, void *),
|
||||
void *arg);
|
||||
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
|
||||
|
||||
|
|
|
@ -1192,6 +1192,8 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
|
|||
struct ceph_mds_session *session);
|
||||
void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
|
||||
struct ceph_inode_info *ci);
|
||||
extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci,
|
||||
int mds);
|
||||
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
|
||||
int mds);
|
||||
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
|
||||
|
|
|
@ -535,6 +535,8 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#define MAX_XATTR_VAL_PRINT_LEN 256
|
||||
|
||||
static int __set_xattr(struct ceph_inode_info *ci,
|
||||
const char *name, int name_len,
|
||||
const char *val, int val_len,
|
||||
|
@ -597,7 +599,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
|
|||
xattr->should_free_name = update_xattr;
|
||||
|
||||
ci->i_xattrs.count++;
|
||||
dout("__set_xattr count=%d\n", ci->i_xattrs.count);
|
||||
dout("%s count=%d\n", __func__, ci->i_xattrs.count);
|
||||
} else {
|
||||
kfree(*newxattr);
|
||||
*newxattr = NULL;
|
||||
|
@ -625,11 +627,13 @@ static int __set_xattr(struct ceph_inode_info *ci,
|
|||
if (new) {
|
||||
rb_link_node(&xattr->node, parent, p);
|
||||
rb_insert_color(&xattr->node, &ci->i_xattrs.index);
|
||||
dout("__set_xattr_val p=%p\n", p);
|
||||
dout("%s p=%p\n", __func__, p);
|
||||
}
|
||||
|
||||
dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
|
||||
ceph_vinop(&ci->netfs.inode), xattr, name_len, name, val_len, val);
|
||||
dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__,
|
||||
ceph_vinop(&ci->netfs.inode), xattr, name_len, name,
|
||||
min(val_len, MAX_XATTR_VAL_PRINT_LEN), val,
|
||||
val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -655,13 +659,15 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
|
|||
else if (c > 0)
|
||||
p = &(*p)->rb_right;
|
||||
else {
|
||||
dout("__get_xattr %s: found %.*s\n", name,
|
||||
xattr->val_len, xattr->val);
|
||||
int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN);
|
||||
|
||||
dout("%s %s: found %.*s%s\n", __func__, name, len,
|
||||
xattr->val, xattr->val_len > len ? "..." : "");
|
||||
return xattr;
|
||||
}
|
||||
}
|
||||
|
||||
dout("__get_xattr %s: not found\n", name);
|
||||
dout("%s %s: not found\n", __func__, name);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue