ceph: stop forwarding the request when exceeding 256 times

The type of 'num_fwd' in ceph 'MClientRequestForward' is 'int32_t',
while in 'ceph_mds_request_head' the type is '__u8'. So in case
the request bounces between MDSes exceeding 256 times, the client
will get stuck.

In this case it's ususally a bug in MDS and continue bouncing the
request makes no sense.

URL: https://tracker.ceph.com/issues/55130
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Luís Henriques <lhenriques@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Xiubo Li 2022-03-29 12:48:01 +08:00 committed by Ilya Dryomov
parent 6c1dc50284
commit 1980b1bf17
1 changed files with 34 additions and 5 deletions

View File

@ -3265,6 +3265,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
int err = -EINVAL;
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
bool aborted = false;
ceph_decode_need(&p, end, 2*sizeof(u32), bad);
next_mds = ceph_decode_32(&p);
@ -3273,16 +3274,41 @@ static void handle_forward(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
req = lookup_get_request(mdsc, tid);
if (!req) {
mutex_unlock(&mdsc->mutex);
dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
goto out; /* dup reply? */
return; /* dup reply? */
}
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd) {
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
tid, next_mds, req->r_num_fwd, fwd_seq);
/*
* The type of 'num_fwd' in ceph 'MClientRequestForward'
* is 'int32_t', while in 'ceph_mds_request_head' the
* type is '__u8'. So in case the request bounces between
* MDSes exceeding 256 times, the client will get stuck.
*
* In this case it's ususally a bug in MDS and continue
* bouncing the request makes no sense.
*
* In future this could be fixed in ceph code, so avoid
* using the hardcode here.
*/
int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
max = 1 << (max * BITS_PER_BYTE);
if (req->r_num_fwd >= max) {
mutex_lock(&req->r_fill_mutex);
req->r_err = -EMULTIHOP;
set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
mutex_unlock(&req->r_fill_mutex);
aborted = true;
pr_warn_ratelimited("forward tid %llu seq overflow\n",
tid);
} else {
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
tid, next_mds, req->r_num_fwd, fwd_seq);
}
} else {
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@ -3294,9 +3320,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
put_request_session(req);
__do_request(mdsc, req);
}
ceph_mdsc_put_request(req);
out:
mutex_unlock(&mdsc->mutex);
/* kick calling process */
if (aborted)
complete_request(mdsc, req);
ceph_mdsc_put_request(req);
return;
bad: