ceph: stop forwarding the request when exceeding 256 times
The type of 'num_fwd' in ceph 'MClientRequestForward' is 'int32_t', while in 'ceph_mds_request_head' the type is '__u8'. So in case the request bounces between MDSes exceeding 256 times, the client will get stuck. In this case it's ususally a bug in MDS and continue bouncing the request makes no sense. URL: https://tracker.ceph.com/issues/55130 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Reviewed-by: Luís Henriques <lhenriques@suse.de> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
6c1dc50284
commit
1980b1bf17
|
@ -3265,6 +3265,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
|||
int err = -EINVAL;
|
||||
void *p = msg->front.iov_base;
|
||||
void *end = p + msg->front.iov_len;
|
||||
bool aborted = false;
|
||||
|
||||
ceph_decode_need(&p, end, 2*sizeof(u32), bad);
|
||||
next_mds = ceph_decode_32(&p);
|
||||
|
@ -3273,16 +3274,41 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
|||
mutex_lock(&mdsc->mutex);
|
||||
req = lookup_get_request(mdsc, tid);
|
||||
if (!req) {
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
|
||||
goto out; /* dup reply? */
|
||||
return; /* dup reply? */
|
||||
}
|
||||
|
||||
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
dout("forward tid %llu aborted, unregistering\n", tid);
|
||||
__unregister_request(mdsc, req);
|
||||
} else if (fwd_seq <= req->r_num_fwd) {
|
||||
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
|
||||
tid, next_mds, req->r_num_fwd, fwd_seq);
|
||||
/*
|
||||
* The type of 'num_fwd' in ceph 'MClientRequestForward'
|
||||
* is 'int32_t', while in 'ceph_mds_request_head' the
|
||||
* type is '__u8'. So in case the request bounces between
|
||||
* MDSes exceeding 256 times, the client will get stuck.
|
||||
*
|
||||
* In this case it's ususally a bug in MDS and continue
|
||||
* bouncing the request makes no sense.
|
||||
*
|
||||
* In future this could be fixed in ceph code, so avoid
|
||||
* using the hardcode here.
|
||||
*/
|
||||
int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
|
||||
max = 1 << (max * BITS_PER_BYTE);
|
||||
if (req->r_num_fwd >= max) {
|
||||
mutex_lock(&req->r_fill_mutex);
|
||||
req->r_err = -EMULTIHOP;
|
||||
set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
|
||||
mutex_unlock(&req->r_fill_mutex);
|
||||
aborted = true;
|
||||
pr_warn_ratelimited("forward tid %llu seq overflow\n",
|
||||
tid);
|
||||
} else {
|
||||
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
|
||||
tid, next_mds, req->r_num_fwd, fwd_seq);
|
||||
}
|
||||
} else {
|
||||
/* resend. forward race not possible; mds would drop */
|
||||
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
|
||||
|
@ -3294,9 +3320,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
|||
put_request_session(req);
|
||||
__do_request(mdsc, req);
|
||||
}
|
||||
ceph_mdsc_put_request(req);
|
||||
out:
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
|
||||
/* kick calling process */
|
||||
if (aborted)
|
||||
complete_request(mdsc, req);
|
||||
ceph_mdsc_put_request(req);
|
||||
return;
|
||||
|
||||
bad:
|
||||
|
|
Loading…
Reference in New Issue