From b3a0066005821acdc0cdb092cb72587182ab583f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 31 Oct 2017 09:53:28 +0100 Subject: [PATCH 01/10] dnotify: Handle errors from fsnotify_add_mark_locked() in fcntl_dirnotify() fsnotify_add_mark_locked() can fail but we do not check its return value. This didn't matter before commit 9dd813c15b2c "fsnotify: Move mark list head from object into dedicated structure" as none of possible failures could happen for dnotify but after that commit -ENOMEM can be returned. Handle this error properly in fcntl_dirnotify() as otherwise we just hit BUG_ON(dn_mark->dn) in dnotify_free_mark(). Reviewed-by: Amir Goldstein Reported-by: syzkaller Fixes: 9dd813c15b2c101168808d4f5941a29985758973 Signed-off-by: Jan Kara --- fs/notify/dnotify/dnotify.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index cba328315929..63a1ca4b9dee 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -319,7 +319,11 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + if (error) { + mutex_unlock(&dnotify_group->mark_mutex); + goto out_err; + } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; @@ -345,6 +349,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) */ if (dn_mark == new_dn_mark) destroy = 1; + error = 0; goto out; } From 9cf90cef362d44b2f3fcdb7d0694849a6308b620 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 31 Oct 2017 10:09:25 +0100 Subject: [PATCH 02/10] fsnotify: Protect bail out path of fsnotify_add_mark_locked() properly When fsnotify_add_mark_locked() fails it cleans up the mark it was adding. Since the mark is already visible in group's list, we should protect update of mark->flags with mark->lock. I'm not aware of any real issues this could cause (since we also hold group->mark_mutex) but better be safe and obey locking rules properly. Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/mark.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 9991f8826734..47a827975b58 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -599,9 +599,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, return ret; err: + spin_lock(&mark->lock); mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); + spin_unlock(&mark->lock); atomic_dec(&group->num_marks); fsnotify_put_mark(mark); From 7761daa6a1599fa5479b8da367470f632a1927e0 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 13:26:01 +0300 Subject: [PATCH 03/10] fsnotify: convert fsnotify_group.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable fsnotify_group.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Jan Kara --- fs/notify/group.c | 6 +++--- include/linux/fsnotify_backend.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/notify/group.c b/fs/notify/group.c index 32357534de18..b7a4b6a69efa 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -107,7 +107,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) */ void fsnotify_get_group(struct fsnotify_group *group) { - atomic_inc(&group->refcnt); + refcount_inc(&group->refcnt); } /* @@ -115,7 +115,7 @@ void fsnotify_get_group(struct fsnotify_group *group) */ void fsnotify_put_group(struct fsnotify_group *group) { - if (atomic_dec_and_test(&group->refcnt)) + if (refcount_dec_and_test(&group->refcnt)) fsnotify_final_destroy_group(group); } @@ -131,7 +131,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) return ERR_PTR(-ENOMEM); /* set to 0 when there a no external references to this group */ - atomic_set(&group->refcnt, 1); + refcount_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); atomic_set(&group->user_waits, 0); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c6c69318752b..20a57bac38f2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -135,7 +136,7 @@ struct fsnotify_group { * inotify_init() and the refcnt will hit 0 only when that fd has been * closed. */ - atomic_t refcnt; /* things with interest in this group */ + refcount_t refcnt; /* things with interest in this group */ const struct fsnotify_ops *ops; /* how this group handles things */ From 24c20305c7fc8959836211cb8c50aab93ae0e54f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:55 +0100 Subject: [PATCH 04/10] fsnotify: clean up fsnotify_prepare/finish_user_wait() This patch doesn't actually fix any bug, just paves the way for fixing mark and group pinning. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Cc: # v4.12 Signed-off-by: Jan Kara --- fs/notify/mark.c | 96 ++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 47a827975b58..7ecd15add6a7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) atomic_inc(&mark->refcnt); } -/* - * Get mark reference when we found the mark via lockless traversal of object - * list. Mark can be already removed from the list by now and on its way to be - * destroyed once SRCU period ends. - */ -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) -{ - return atomic_inc_not_zero(&mark->refcnt); -} - static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; @@ -256,32 +246,63 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) FSNOTIFY_REAPER_DELAY); } -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + * + * Also pin the group so it doesn't disappear under us. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { struct fsnotify_group *group; - if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) - return false; - - if (iter_info->inode_mark) - group = iter_info->inode_mark->group; - else - group = iter_info->vfsmount_mark->group; + if (!mark) + return true; + group = mark->group; /* * Since acquisition of mark reference is an atomic op as well, we can * be sure this inc is seen before any effect of refcount increment. */ atomic_inc(&group->user_waits); + if (atomic_inc_not_zero(&mark->refcnt)) + return true; - if (iter_info->inode_mark) { - /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->inode_mark)) - goto out_wait; + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); + + return false; +} + +/* + * Puts marks and wakes up group destruction if necessary. + * + * Pairs with fsnotify_get_mark_safe() + */ +static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) +{ + if (mark) { + struct fsnotify_group *group = mark->group; + + fsnotify_put_mark(mark); + /* + * We abuse notification_waitq on group shutdown for waiting for + * all marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); } - if (iter_info->vfsmount_mark) { - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) - goto out_inode; +} + +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +{ + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) + return false; + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { + fsnotify_put_mark_wake(iter_info->inode_mark); + return false; } /* @@ -292,34 +313,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); return true; -out_inode: - if (iter_info->inode_mark) - fsnotify_put_mark(iter_info->inode_mark); -out_wait: - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) - wake_up(&group->notification_waitq); - return false; } void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) { - struct fsnotify_group *group = NULL; - iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - if (iter_info->inode_mark) { - group = iter_info->inode_mark->group; - fsnotify_put_mark(iter_info->inode_mark); - } - if (iter_info->vfsmount_mark) { - group = iter_info->vfsmount_mark->group; - fsnotify_put_mark(iter_info->vfsmount_mark); - } - /* - * We abuse notification_waitq on group shutdown for waiting for all - * marks pinned when waiting for userspace. - */ - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) - wake_up(&group->notification_waitq); + fsnotify_put_mark_wake(iter_info->inode_mark); + fsnotify_put_mark_wake(iter_info->vfsmount_mark); } /* From 0d6ec079d6aaa098b978d6395973bb027c752a03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:55 +0100 Subject: [PATCH 05/10] fsnotify: pin both inode and vfsmount mark We may fail to pin one of the marks in fsnotify_prepare_user_wait() when dropping the srcu read lock, resulting in use after free at the next iteration. Solution is to store both marks in iter_info instead of just the one we'll be sending the event for. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler") Cc: # v4.12 Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 0c4583b61717..074716293829 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -335,6 +335,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct fsnotify_mark, obj_list); vfsmount_group = vfsmount_mark->group; } + /* + * Need to protect both marks against freeing so that we can + * continue iteration from this place, regardless of which mark + * we actually happen to send an event for. + */ + iter_info.inode_mark = inode_mark; + iter_info.vfsmount_mark = vfsmount_mark; if (inode_group && vfsmount_group) { int cmp = fsnotify_compare_groups(inode_group, @@ -348,9 +355,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, } } - iter_info.inode_mark = inode_mark; - iter_info.vfsmount_mark = vfsmount_mark; - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &iter_info); From 9a31d7ad997f55768c687974ce36b759065b49e5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:56 +0100 Subject: [PATCH 06/10] fsnotify: fix pinning group in fsnotify_prepare_user_wait() Blind increment of group's user_waits is not enough, we could be far enough in the group's destruction that it isn't taken into account (i.e. grabbing the mark ref afterwards doesn't guarantee that it was the ref coming from the _group_ that was grabbed). Instead we need to check (under lock) that the mark is still attached to the group after having obtained a ref to the mark. If not, skip it. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler") Cc: # v4.12 Signed-off-by: Jan Kara --- fs/notify/mark.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 7ecd15add6a7..f3a32ea15b49 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -255,23 +255,20 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) */ static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - struct fsnotify_group *group; - if (!mark) return true; - group = mark->group; - /* - * Since acquisition of mark reference is an atomic op as well, we can - * be sure this inc is seen before any effect of refcount increment. - */ - atomic_inc(&group->user_waits); - if (atomic_inc_not_zero(&mark->refcnt)) - return true; - - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) - wake_up(&group->notification_waitq); - + if (atomic_inc_not_zero(&mark->refcnt)) { + spin_lock(&mark->lock); + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { + /* mark is attached, group is still alive then */ + atomic_inc(&mark->group->user_waits); + spin_unlock(&mark->lock); + return true; + } + spin_unlock(&mark->lock); + fsnotify_put_mark(mark); + } return false; } From f37650f1c7c71cf5180b43229d13b421d81e7170 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:56 +0100 Subject: [PATCH 07/10] fanotify: fix fsnotify_prepare_user_wait() failure If fsnotify_prepare_user_wait() fails, we leave the event on the notification list. Which will result in a warning in fsnotify_destroy_event() and later use-after-free. Instead of adding a new helper to remove the event from the list in this case, I opted to move the prepare/finish up into fanotify_handle_event(). This will allow these to be moved further out into the generic code later, and perhaps let us move to non-sleeping RCU. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 05f0e38724e8 ("fanotify: Release SRCU lock when waiting for userspace response") Cc: # v4.12 Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 2fa99aeaa095..df3f484e458a 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -64,19 +64,8 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - /* - * fsnotify_prepare_user_wait() fails if we race with mark deletion. - * Just let the operation pass in that case. - */ - if (!fsnotify_prepare_user_wait(iter_info)) { - event->response = FAN_ALLOW; - goto out; - } - wait_event(group->fanotify_data.access_waitq, event->response); - fsnotify_finish_user_wait(iter_info); -out: /* userspace responded, convert to something usable */ switch (event->response) { case FAN_ALLOW: @@ -211,9 +200,21 @@ static int fanotify_handle_event(struct fsnotify_group *group, pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, mask); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (mask & FAN_ALL_PERM_EVENTS) { + /* + * fsnotify_prepare_user_wait() fails if we race with mark + * deletion. Just let the operation pass in that case. + */ + if (!fsnotify_prepare_user_wait(iter_info)) + return 0; + } +#endif + event = fanotify_alloc_event(inode, mask, data); + ret = -ENOMEM; if (unlikely(!event)) - return -ENOMEM; + goto finish; fsn_event = &event->fse; ret = fsnotify_add_event(group, fsn_event, fanotify_merge); @@ -223,7 +224,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); - return 0; + ret = 0; + goto finish; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS @@ -232,6 +234,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, iter_info); fsnotify_destroy_event(group, fsn_event); } +finish: + if (mask & FAN_ALL_PERM_EVENTS) + fsnotify_finish_user_wait(iter_info); +#else +finish: #endif return ret; } From 3427ce7155412341aeb635c22c3ca2c2c9d1a978 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:56 +0100 Subject: [PATCH 08/10] fsnotify: clean up fsnotify() Use helpers to get first and next marks from connector. Also get rid of inode_node/vfsmount_node local variables, which just refers to the same objects as iter_info. There was an srcu_dereference() for foo_node, but that's completely superfluous since we've already done it when obtaining foo_node. Also get rid of inode_group/vfsmount_group local variables; checking against non-NULL for these is the same as checking against non-NULL inode_mark/vfsmount_mark. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 103 ++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 59 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 074716293829..81d8959b6aef 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -243,6 +243,29 @@ static int send_to_group(struct inode *to_tell, file_name, cookie, iter_info); } +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +{ + struct fsnotify_mark_connector *conn; + struct hlist_node *node = NULL; + + conn = srcu_dereference(*connp, &fsnotify_mark_srcu); + if (conn) + node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); + + return hlist_entry_safe(node, struct fsnotify_mark, obj_list); +} + +static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) +{ + struct hlist_node *node = NULL; + + if (mark) + node = srcu_dereference(mark->obj_list.next, + &fsnotify_mark_srcu); + + return hlist_entry_safe(node, struct fsnotify_mark, obj_list); +} + /* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call @@ -252,11 +275,7 @@ static int send_to_group(struct inode *to_tell, int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { - struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; - struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; - struct fsnotify_group *inode_group, *vfsmount_group; - struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; - struct fsnotify_iter_info iter_info; + struct fsnotify_iter_info iter_info = {}; struct mount *mnt; int ret = 0; /* global tests shouldn't care about events on child only the specific event */ @@ -291,26 +310,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if ((mask & FS_MODIFY) || (test_mask & to_tell->i_fsnotify_mask)) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, - &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); + iter_info.inode_mark = + fsnotify_first_mark(&to_tell->i_fsnotify_marks); } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, - &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); - vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks, - &fsnotify_mark_srcu); - if (vfsmount_conn) - vfsmount_node = srcu_dereference( - vfsmount_conn->list.first, - &fsnotify_mark_srcu); + iter_info.inode_mark = + fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.vfsmount_mark = + fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } /* @@ -318,41 +327,17 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * ignore masks are properly reflected for mount mark notifications. * That's why this traversal is so complicated... */ - while (inode_node || vfsmount_node) { - inode_group = NULL; - inode_mark = NULL; - vfsmount_group = NULL; - vfsmount_mark = NULL; + while (iter_info.inode_mark || iter_info.vfsmount_mark) { + struct fsnotify_mark *inode_mark = iter_info.inode_mark; + struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark; - if (inode_node) { - inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), - struct fsnotify_mark, obj_list); - inode_group = inode_mark->group; - } - - if (vfsmount_node) { - vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), - struct fsnotify_mark, obj_list); - vfsmount_group = vfsmount_mark->group; - } - /* - * Need to protect both marks against freeing so that we can - * continue iteration from this place, regardless of which mark - * we actually happen to send an event for. - */ - iter_info.inode_mark = inode_mark; - iter_info.vfsmount_mark = vfsmount_mark; - - if (inode_group && vfsmount_group) { - int cmp = fsnotify_compare_groups(inode_group, - vfsmount_group); - if (cmp > 0) { - inode_group = NULL; + if (inode_mark && vfsmount_mark) { + int cmp = fsnotify_compare_groups(inode_mark->group, + vfsmount_mark->group); + if (cmp > 0) inode_mark = NULL; - } else if (cmp < 0) { - vfsmount_group = NULL; + else if (cmp < 0) vfsmount_mark = NULL; - } } ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, @@ -362,12 +347,12 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; - if (inode_group) - inode_node = srcu_dereference(inode_node->next, - &fsnotify_mark_srcu); - if (vfsmount_group) - vfsmount_node = srcu_dereference(vfsmount_node->next, - &fsnotify_mark_srcu); + if (inode_mark) + iter_info.inode_mark = + fsnotify_next_mark(iter_info.inode_mark); + if (vfsmount_mark) + iter_info.vfsmount_mark = + fsnotify_next_mark(iter_info.vfsmount_mark); } ret = 0; out: From 6685df31255493c3f0e9e0b8bf885e4c9762fc5d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:56 +0100 Subject: [PATCH 09/10] fanotify: clean up CONFIG_FANOTIFY_ACCESS_PERMISSIONS ifdefs The only negative from this patch should be an addition of 32bytes to 'struct fsnotify_group' if CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not defined. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 30 +++++---------------- fs/notify/fanotify/fanotify.h | 8 ++++-- fs/notify/fanotify/fanotify_user.c | 43 +++++++++++------------------- include/linux/fsnotify_backend.h | 2 -- 4 files changed, 29 insertions(+), 54 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index df3f484e458a..63f56b007280 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -35,15 +35,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) pr_debug("%s: list=%p event=%p\n", __func__, list, event); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (event->mask & FAN_ALL_PERM_EVENTS) + if (fanotify_is_perm_event(event->mask)) return 0; -#endif list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { @@ -55,7 +53,6 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) return 0; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event_info *event, struct fsnotify_iter_info *iter_info) @@ -82,7 +79,6 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; } -#endif static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmnt_mark, @@ -141,8 +137,7 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, { struct fanotify_event_info *event; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & FAN_ALL_PERM_EVENTS) { + if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, @@ -153,7 +148,6 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, pevent->response = 0; goto init; } -#endif event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); if (!event) return NULL; @@ -200,8 +194,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, mask); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & FAN_ALL_PERM_EVENTS) { + if (fanotify_is_perm_event(mask)) { /* * fsnotify_prepare_user_wait() fails if we race with mark * deletion. Just let the operation pass in that case. @@ -209,7 +202,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, if (!fsnotify_prepare_user_wait(iter_info)) return 0; } -#endif event = fanotify_alloc_event(inode, mask, data); ret = -ENOMEM; @@ -225,21 +217,15 @@ static int fanotify_handle_event(struct fsnotify_group *group, fsnotify_destroy_event(group, fsn_event); ret = 0; - goto finish; - } - -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & FAN_ALL_PERM_EVENTS) { + } else if (fanotify_is_perm_event(mask)) { ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), iter_info); fsnotify_destroy_event(group, fsn_event); } finish: - if (mask & FAN_ALL_PERM_EVENTS) + if (fanotify_is_perm_event(mask)) fsnotify_finish_user_wait(iter_info); -#else -finish: -#endif + return ret; } @@ -259,13 +245,11 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); put_pid(event->tgid); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (fsn_event->mask & FAN_ALL_PERM_EVENTS) { + if (fanotify_is_perm_event(fsn_event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; } -#endif kmem_cache_free(fanotify_event_cachep, event); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4eb6f5efa282..dc219cf07a6a 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -21,7 +21,6 @@ struct fanotify_event_info { struct pid *tgid; }; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -40,7 +39,12 @@ FANOTIFY_PE(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_perm_event_info, fae.fse); } -#endif + +static inline bool fanotify_is_perm_event(u32 mask) +{ + return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) && + mask & FAN_ALL_PERM_EVENTS; +} static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 907a481ac781..a434de023c49 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -142,7 +142,6 @@ static int fill_event_metadata(struct fsnotify_group *group, return ret; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static struct fanotify_perm_event_info *dequeue_event( struct fsnotify_group *group, int fd) { @@ -199,7 +198,6 @@ static int process_access_response(struct fsnotify_group *group, return 0; } -#endif static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fsnotify_event *event, @@ -221,10 +219,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_metadata.event_len)) goto out_close_fd; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (event->mask & FAN_ALL_PERM_EVENTS) + if (fanotify_is_perm_event(event->mask)) FANOTIFY_PE(event)->fd = fd; -#endif if (fd != FAN_NOFD) fd_install(fd, f); @@ -309,10 +305,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { + if (!fanotify_is_perm_event(kevent->mask)) { fsnotify_destroy_event(group, kevent); } else { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); @@ -322,7 +317,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, &group->fanotify_data.access_list); spin_unlock(&group->notification_lock); } -#endif } if (ret < 0) break; @@ -338,11 +332,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_response response = { .fd = -1, .response = -1 }; struct fsnotify_group *group; int ret; + if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) + return -EINVAL; + group = file->private_data; if (count > sizeof(response)) @@ -358,16 +354,11 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count = ret; return count; -#else - return -EINVAL; -#endif } static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; struct fsnotify_event *fsn_event; @@ -403,14 +394,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); - } else + } else { FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + } } spin_unlock(&group->notification_lock); /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); -#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_destroy_group(group); @@ -768,10 +759,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (force_o_largefile()) event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); -#endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; @@ -825,6 +814,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, struct fsnotify_group *group; struct fd f; struct path path; + u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -855,11 +845,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, mask &= ~FAN_ONDIR; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) -#else - if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) -#endif + if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) + valid_mask |= FAN_ALL_PERM_EVENTS; + + if (mask & ~valid_mask) return -EINVAL; f = fdget(fanotify_fd); @@ -949,10 +938,10 @@ static int __init fanotify_user_setup(void) { fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event_info, - SLAB_PANIC); -#endif + if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { + fanotify_perm_event_cachep = + KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC); + } return 0; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 20a57bac38f2..744e2b9969fc 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -183,11 +183,9 @@ struct fsnotify_group { #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; -#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; struct user_struct *user; From ab97f87325e28b7ef7717e6cb62e8da14a7176e1 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 13:26:02 +0300 Subject: [PATCH 10/10] fsnotify: convert fsnotify_mark.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable fsnotify_mark.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Jan Kara --- fs/notify/inotify/inotify_user.c | 4 ++-- fs/notify/mark.c | 14 +++++++------- include/linux/fsnotify_backend.h | 2 +- kernel/audit_tree.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 7cc7d3fb1862..d3c20e0bb046 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -376,7 +376,7 @@ static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group fsnotify_get_mark(fsn_mark); /* One ref for being in the idr, one ref we just took */ - BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); + BUG_ON(refcount_read(&fsn_mark->refcnt) < 2); } return i_mark; @@ -446,7 +446,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * One ref for being in the idr * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) { printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ diff --git a/fs/notify/mark.c b/fs/notify/mark.c index f3a32ea15b49..e9191b416434 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -105,8 +105,8 @@ static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); void fsnotify_get_mark(struct fsnotify_mark *mark) { - WARN_ON_ONCE(!atomic_read(&mark->refcnt)); - atomic_inc(&mark->refcnt); + WARN_ON_ONCE(!refcount_read(&mark->refcnt)); + refcount_inc(&mark->refcnt); } static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) @@ -201,7 +201,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) /* Catch marks that were actually never attached to object */ if (!mark->connector) { - if (atomic_dec_and_test(&mark->refcnt)) + if (refcount_dec_and_test(&mark->refcnt)) fsnotify_final_mark_destroy(mark); return; } @@ -210,7 +210,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * We have to be careful so that traversals of obj_list under lock can * safely grab mark reference. */ - if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) return; conn = mark->connector; @@ -258,7 +258,7 @@ static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) if (!mark) return true; - if (atomic_inc_not_zero(&mark->refcnt)) { + if (refcount_inc_not_zero(&mark->refcnt)) { spin_lock(&mark->lock); if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { /* mark is attached, group is still alive then */ @@ -335,7 +335,7 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && - atomic_read(&mark->refcnt) < 1 + + refcount_read(&mark->refcnt) < 1 + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); spin_lock(&mark->lock); @@ -737,7 +737,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, { memset(mark, 0, sizeof(*mark)); spin_lock_init(&mark->lock); - atomic_set(&mark->refcnt, 1); + refcount_set(&mark->refcnt, 1); fsnotify_get_group(group); mark->group = group; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 744e2b9969fc..9bcb43953f4e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -242,7 +242,7 @@ struct fsnotify_mark { __u32 mask; /* We hold one for presence in g_list. Also one ref for each 'thing' * in kernel that found and may be using this mark. */ - atomic_t refcnt; + refcount_t refcnt; /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 011d46e5f73f..45ec960ad536 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1007,7 +1007,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify * We are guaranteed to have at least one reference to the mark from * either the inode or the caller of fsnotify_destroy_mark(). */ - BUG_ON(atomic_read(&entry->refcnt) < 1); + BUG_ON(refcount_read(&entry->refcnt) < 1); } static const struct fsnotify_ops audit_tree_ops = {