ceph: fix race of queuing delayed caps

When called with CHECK_CAPS_AUTHONLY flag, ceph_check_caps() only processes auth caps. In that case, it's unsafe to remove inode from mdsc->cap_delay_list, because there can be delayed non-auth caps. Besides, ceph_check_caps() may lock/unlock i_ceph_lock several times, when multiple threads call ceph_check_caps() at the same time. It's possible that one thread calls __cap_delay_requeue(), another thread calls __cap_delay_cancel(). __cap_delay_cancel() should be called at very beginning of ceph_check_caps(), so that it does not race with __cap_delay_requeue(). Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2018-01-08 14:44:10 +08:00 · 2018-01-08 14:44:10 +08:00 · 0f439c746c
parent ee612d954f
commit 0f439c746c
1 changed files with 16 additions and 17 deletions
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@ -902,6 +902,11 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 /*
 * called under i_ceph_lock
 */
+static int __ceph_is_single_caps(struct ceph_inode_info *ci)
+{
+	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
+}
+
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
 	return !RB_EMPTY_ROOT(&ci->i_caps);
@ -1715,21 +1720,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
 	struct rb_node *p;
-	int delayed = 0, sent = 0, num;
-	bool is_delayed = flags & CHECK_CAPS_NODELAY;
+	int delayed = 0, sent = 0;
+	bool no_delay = flags & CHECK_CAPS_NODELAY;
 	bool queue_invalidate = false;
-	bool force_requeue = false;
 	bool tried_invalidate = false;

 	/* if we are unmounting, flush any unused caps immediately. */
 	if (mdsc->stopping)
-		is_delayed = true;
+		no_delay = true;

 	spin_lock(&ci->i_ceph_lock);

 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;

+	if (!(flags & CHECK_CAPS_AUTHONLY) ||
+	    (ci->i_auth_cap && __ceph_is_single_caps(ci)))
+		__cap_delay_cancel(mdsc, ci);
+
 	goto retry_locked;
 retry:
 	spin_lock(&ci->i_ceph_lock);
@ -1784,7 +1792,7 @@ retry_locked:
 	 * have cached pages, but don't want them, then try to invalidate.
 	 * If we fail, it's because pages are locked.... try again later.
 	 */
-	if ((!is_delayed || mdsc->stopping) &&
+	if ((!no_delay || mdsc->stopping) &&
 	    !S_ISDIR(inode->i_mode) &&		/* ignore readdir cache */
 	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
 	    inode->i_data.nrpages &&		/* have cached pages */
@ -1801,10 +1809,8 @@ retry_locked:
 		goto retry_locked;
 	}

-	num = 0;
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		num++;

 		/* avoid looping forever */
 		if (mds >= cap->mds ||
@ -1867,7 +1873,7 @@ retry_locked:
 		    cap->mds_wanted == want)
 			continue;     /* nope, all good */

-		if (is_delayed)
+		if (no_delay)
 			goto ack;

 		/* delay? */
@ -1958,15 +1964,8 @@ ack:
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}

-	/*
-	 * Reschedule delayed caps release if we delayed anything,
-	 * otherwise cancel.
-	 */
-	if (delayed && is_delayed)
-		force_requeue = true;   /* __send_cap delayed release; requeue */
-	if (!delayed && !is_delayed)
-		__cap_delay_cancel(mdsc, ci);
-	else if (!is_delayed || force_requeue)
+	/* Reschedule delayed caps release if we delayed anything */
+	if (delayed)
 		__cap_delay_requeue(mdsc, ci);

 	spin_unlock(&ci->i_ceph_lock);