ceph: check session state after bumping session->s_seq
Some messages sent by the MDS entail a session sequence number
increment, and the MDS will drop certain types of requests on the floor
when the sequence numbers don't match.
In particular, a REQUEST_CLOSE message can cross with one of the
sequence morphing messages from the MDS which can cause the client to
stall, waiting for a response that will never come.
Originally, this meant an up to 5s delay before the recurring workqueue
job kicked in and resent the request, but a recent change made it so
that the client would never resend, causing a 60s stall unmounting and
sometimes a blockisting event.
Add a new helper for incrementing the session sequence and then testing
to see whether a REQUEST_CLOSE needs to be resent, and move the handling
of CEPH_MDS_SESSION_CLOSING into that function. Change all of the
bare sequence counter increments to use the new helper.
Reorganize check_session_state with a switch statement. It should no
longer be called when the session is CLOSING, so throw a warning if it
ever is (but still handle that case sanely).
[ idryomov: whitespace, pr_err() call fixup ]
URL: https://tracker.ceph.com/issues/47563
Fixes: fa99677342
("ceph: fix potential mdsc use-after-free crash")
Reported-by: Patrick Donnelly <pdonnell@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
3cea11cd5e
commit
62575e270f
|
@ -4074,7 +4074,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
||||||
vino.snap, inode);
|
vino.snap, inode);
|
||||||
|
|
||||||
mutex_lock(&session->s_mutex);
|
mutex_lock(&session->s_mutex);
|
||||||
session->s_seq++;
|
inc_session_sequence(session);
|
||||||
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
|
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
|
||||||
(unsigned)seq);
|
(unsigned)seq);
|
||||||
|
|
||||||
|
|
|
@ -4231,7 +4231,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
|
||||||
dname.len, dname.name);
|
dname.len, dname.name);
|
||||||
|
|
||||||
mutex_lock(&session->s_mutex);
|
mutex_lock(&session->s_mutex);
|
||||||
session->s_seq++;
|
inc_session_sequence(session);
|
||||||
|
|
||||||
if (!inode) {
|
if (!inode) {
|
||||||
dout("handle_lease no inode %llx\n", vino.ino);
|
dout("handle_lease no inode %llx\n", vino.ino);
|
||||||
|
@ -4385,28 +4385,48 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
|
||||||
|
|
||||||
bool check_session_state(struct ceph_mds_session *s)
|
bool check_session_state(struct ceph_mds_session *s)
|
||||||
{
|
{
|
||||||
if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
|
switch (s->s_state) {
|
||||||
dout("resending session close request for mds%d\n",
|
case CEPH_MDS_SESSION_OPEN:
|
||||||
s->s_mds);
|
if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
|
||||||
request_close_session(s);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
|
|
||||||
if (s->s_state == CEPH_MDS_SESSION_OPEN) {
|
|
||||||
s->s_state = CEPH_MDS_SESSION_HUNG;
|
s->s_state = CEPH_MDS_SESSION_HUNG;
|
||||||
pr_info("mds%d hung\n", s->s_mds);
|
pr_info("mds%d hung\n", s->s_mds);
|
||||||
}
|
}
|
||||||
}
|
break;
|
||||||
if (s->s_state == CEPH_MDS_SESSION_NEW ||
|
case CEPH_MDS_SESSION_CLOSING:
|
||||||
s->s_state == CEPH_MDS_SESSION_RESTARTING ||
|
/* Should never reach this when we're unmounting */
|
||||||
s->s_state == CEPH_MDS_SESSION_CLOSED ||
|
WARN_ON_ONCE(true);
|
||||||
s->s_state == CEPH_MDS_SESSION_REJECTED)
|
fallthrough;
|
||||||
/* this mds is failed or recovering, just wait */
|
case CEPH_MDS_SESSION_NEW:
|
||||||
|
case CEPH_MDS_SESSION_RESTARTING:
|
||||||
|
case CEPH_MDS_SESSION_CLOSED:
|
||||||
|
case CEPH_MDS_SESSION_REJECTED:
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
|
||||||
|
* then we need to retransmit that request.
|
||||||
|
*/
|
||||||
|
void inc_session_sequence(struct ceph_mds_session *s)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&s->s_mutex);
|
||||||
|
|
||||||
|
s->s_seq++;
|
||||||
|
|
||||||
|
if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
dout("resending session close request for mds%d\n", s->s_mds);
|
||||||
|
ret = request_close_session(s);
|
||||||
|
if (ret < 0)
|
||||||
|
pr_err("unable to close session to mds%d: %d\n",
|
||||||
|
s->s_mds, ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* delayed work -- periodically trim expired leases, renew caps with mds
|
* delayed work -- periodically trim expired leases, renew caps with mds
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -480,6 +480,7 @@ struct ceph_mds_client {
|
||||||
extern const char *ceph_mds_op_name(int op);
|
extern const char *ceph_mds_op_name(int op);
|
||||||
|
|
||||||
extern bool check_session_state(struct ceph_mds_session *s);
|
extern bool check_session_state(struct ceph_mds_session *s);
|
||||||
|
void inc_session_sequence(struct ceph_mds_session *s);
|
||||||
|
|
||||||
extern struct ceph_mds_session *
|
extern struct ceph_mds_session *
|
||||||
__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
|
__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
|
||||||
|
|
|
@ -53,7 +53,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
|
||||||
|
|
||||||
/* increment msg sequence number */
|
/* increment msg sequence number */
|
||||||
mutex_lock(&session->s_mutex);
|
mutex_lock(&session->s_mutex);
|
||||||
session->s_seq++;
|
inc_session_sequence(session);
|
||||||
mutex_unlock(&session->s_mutex);
|
mutex_unlock(&session->s_mutex);
|
||||||
|
|
||||||
/* lookup inode */
|
/* lookup inode */
|
||||||
|
|
|
@ -873,7 +873,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
||||||
ceph_snap_op_name(op), split, trace_len);
|
ceph_snap_op_name(op), split, trace_len);
|
||||||
|
|
||||||
mutex_lock(&session->s_mutex);
|
mutex_lock(&session->s_mutex);
|
||||||
session->s_seq++;
|
inc_session_sequence(session);
|
||||||
mutex_unlock(&session->s_mutex);
|
mutex_unlock(&session->s_mutex);
|
||||||
|
|
||||||
down_write(&mdsc->snap_rwsem);
|
down_write(&mdsc->snap_rwsem);
|
||||||
|
|
Loading…
Reference in New Issue