fsnotify: srcu to protect read side of inode and vfsmount locks
Currently reading the inode->i_fsnotify_marks or vfsmount->mnt_fsnotify_marks lists are protected by a spinlock on both the read and the write side. This patch protects the read side of those lists with a new single srcu. Signed-off-by: Eric Paris <eparis@redhat.com>
This commit is contained in:
parent
700307a29a
commit
75c1be487a
|
@ -144,14 +144,15 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
|
||||||
{
|
{
|
||||||
struct fsnotify_mark *mark;
|
struct fsnotify_mark *mark;
|
||||||
struct hlist_node *node;
|
struct hlist_node *node;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
idx = srcu_read_lock(&fsnotify_mark_srcu);
|
||||||
|
|
||||||
if (!hlist_empty(&inode->i_fsnotify_marks)) {
|
if (!hlist_empty(&inode->i_fsnotify_marks)) {
|
||||||
spin_lock(&inode->i_lock);
|
hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) {
|
||||||
hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) {
|
|
||||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
|
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
|
||||||
mark->ignored_mask = 0;
|
mark->ignored_mask = 0;
|
||||||
}
|
}
|
||||||
spin_unlock(&inode->i_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data_is == FSNOTIFY_EVENT_FILE) {
|
if (data_is == FSNOTIFY_EVENT_FILE) {
|
||||||
|
@ -159,14 +160,14 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
|
||||||
|
|
||||||
mnt = ((struct file *)data)->f_path.mnt;
|
mnt = ((struct file *)data)->f_path.mnt;
|
||||||
if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
|
if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
|
||||||
spin_lock(&mnt->mnt_root->d_lock);
|
hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
|
||||||
hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
|
|
||||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
|
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
|
||||||
mark->ignored_mask = 0;
|
mark->ignored_mask = 0;
|
||||||
}
|
}
|
||||||
spin_unlock(&mnt->mnt_root->d_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
srcu_read_unlock(&fsnotify_mark_srcu, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
|
static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
|
||||||
|
@ -208,8 +209,10 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
|
||||||
int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
||||||
const unsigned char *file_name, u32 cookie)
|
const unsigned char *file_name, u32 cookie)
|
||||||
{
|
{
|
||||||
|
struct fsnotify_mark *mark;
|
||||||
struct fsnotify_group *group;
|
struct fsnotify_group *group;
|
||||||
struct fsnotify_event *event = NULL;
|
struct fsnotify_event *event = NULL;
|
||||||
|
struct hlist_node *node;
|
||||||
struct vfsmount *mnt = NULL;
|
struct vfsmount *mnt = NULL;
|
||||||
int idx, ret = 0;
|
int idx, ret = 0;
|
||||||
/* global tests shouldn't care about events on child only the specific event */
|
/* global tests shouldn't care about events on child only the specific event */
|
||||||
|
@ -237,35 +240,47 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
||||||
!needed_by_vfsmount(test_mask, mnt))
|
!needed_by_vfsmount(test_mask, mnt))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
idx = srcu_read_lock(&fsnotify_mark_srcu);
|
||||||
* SRCU!! the groups list is very very much read only and the path is
|
|
||||||
* very hot. The VAST majority of events are not going to need to do
|
|
||||||
* anything other than walk the list so it's crazy to pre-allocate.
|
|
||||||
*/
|
|
||||||
idx = srcu_read_lock(&fsnotify_grp_srcu);
|
|
||||||
|
|
||||||
if (test_mask & to_tell->i_fsnotify_mask) {
|
if (test_mask & to_tell->i_fsnotify_mask) {
|
||||||
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
|
hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) {
|
||||||
if (test_mask & group->mask) {
|
|
||||||
ret = send_to_group(group, to_tell, NULL, mask, data, data_is,
|
pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
|
||||||
cookie, file_name, &event);
|
__func__, mark, mark->mask, mark->ignored_mask);
|
||||||
|
|
||||||
|
if (test_mask & mark->mask & ~mark->ignored_mask) {
|
||||||
|
group = mark->group;
|
||||||
|
if (!group)
|
||||||
|
continue;
|
||||||
|
ret = send_to_group(group, to_tell, NULL, mask,
|
||||||
|
data, data_is, cookie, file_name,
|
||||||
|
&event);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (needed_by_vfsmount(test_mask, mnt)) {
|
|
||||||
list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
|
if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) {
|
||||||
if (test_mask & group->mask) {
|
hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
|
||||||
ret = send_to_group(group, to_tell, mnt, mask, data, data_is,
|
|
||||||
cookie, file_name, &event);
|
pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
|
||||||
|
__func__, mark, mark->mask, mark->ignored_mask);
|
||||||
|
|
||||||
|
if (test_mask & mark->mask & ~mark->ignored_mask) {
|
||||||
|
group = mark->group;
|
||||||
|
if (!group)
|
||||||
|
continue;
|
||||||
|
ret = send_to_group(group, to_tell, mnt, mask,
|
||||||
|
data, data_is, cookie, file_name,
|
||||||
|
&event);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
srcu_read_unlock(&fsnotify_grp_srcu, idx);
|
srcu_read_unlock(&fsnotify_mark_srcu, idx);
|
||||||
/*
|
/*
|
||||||
* fsnotify_create_event() took a reference so the event can't be cleaned
|
* fsnotify_create_event() took a reference so the event can't be cleaned
|
||||||
* up while we are still trying to add it to lists, drop that one.
|
* up while we are still trying to add it to lists, drop that one.
|
||||||
|
@ -279,8 +294,14 @@ EXPORT_SYMBOL_GPL(fsnotify);
|
||||||
|
|
||||||
static __init int fsnotify_init(void)
|
static __init int fsnotify_init(void)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
|
BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
|
||||||
|
|
||||||
return init_srcu_struct(&fsnotify_grp_srcu);
|
ret = init_srcu_struct(&fsnotify_mark_srcu);
|
||||||
|
if (ret)
|
||||||
|
panic("initializing fsnotify_mark_srcu");
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
subsys_initcall(fsnotify_init);
|
core_initcall(fsnotify_init);
|
||||||
|
|
|
@ -6,8 +6,6 @@
|
||||||
#include <linux/srcu.h>
|
#include <linux/srcu.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
/* protects reads of fsnotify_groups */
|
|
||||||
extern struct srcu_struct fsnotify_grp_srcu;
|
|
||||||
/* all groups which receive inode fsnotify events */
|
/* all groups which receive inode fsnotify events */
|
||||||
extern struct list_head fsnotify_inode_groups;
|
extern struct list_head fsnotify_inode_groups;
|
||||||
/* all groups which receive vfsmount fsnotify events */
|
/* all groups which receive vfsmount fsnotify events */
|
||||||
|
@ -20,6 +18,9 @@ extern __u32 fsnotify_vfsmount_mask;
|
||||||
/* destroy all events sitting in this groups notification queue */
|
/* destroy all events sitting in this groups notification queue */
|
||||||
extern void fsnotify_flush_notify(struct fsnotify_group *group);
|
extern void fsnotify_flush_notify(struct fsnotify_group *group);
|
||||||
|
|
||||||
|
/* protects reads of inode and vfsmount marks list */
|
||||||
|
extern struct srcu_struct fsnotify_mark_srcu;
|
||||||
|
|
||||||
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
|
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
|
||||||
__u32 mask);
|
__u32 mask);
|
||||||
/* add a mark to an inode */
|
/* add a mark to an inode */
|
||||||
|
|
|
@ -30,8 +30,6 @@
|
||||||
|
|
||||||
/* protects writes to fsnotify_groups and fsnotify_mask */
|
/* protects writes to fsnotify_groups and fsnotify_mask */
|
||||||
static DEFINE_MUTEX(fsnotify_grp_mutex);
|
static DEFINE_MUTEX(fsnotify_grp_mutex);
|
||||||
/* protects reads while running the fsnotify_groups list */
|
|
||||||
struct srcu_struct fsnotify_grp_srcu;
|
|
||||||
/* all groups registered to receive inode filesystem notifications */
|
/* all groups registered to receive inode filesystem notifications */
|
||||||
LIST_HEAD(fsnotify_inode_groups);
|
LIST_HEAD(fsnotify_inode_groups);
|
||||||
/* all groups registered to receive mount point filesystem notifications */
|
/* all groups registered to receive mount point filesystem notifications */
|
||||||
|
@ -50,18 +48,17 @@ void fsnotify_recalc_global_mask(void)
|
||||||
struct fsnotify_group *group;
|
struct fsnotify_group *group;
|
||||||
__u32 inode_mask = 0;
|
__u32 inode_mask = 0;
|
||||||
__u32 vfsmount_mask = 0;
|
__u32 vfsmount_mask = 0;
|
||||||
int idx;
|
|
||||||
|
|
||||||
idx = srcu_read_lock(&fsnotify_grp_srcu);
|
mutex_lock(&fsnotify_grp_mutex);
|
||||||
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
|
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
|
||||||
inode_mask |= group->mask;
|
inode_mask |= group->mask;
|
||||||
list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
|
list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
|
||||||
vfsmount_mask |= group->mask;
|
vfsmount_mask |= group->mask;
|
||||||
|
|
||||||
srcu_read_unlock(&fsnotify_grp_srcu, idx);
|
|
||||||
|
|
||||||
fsnotify_inode_mask = inode_mask;
|
fsnotify_inode_mask = inode_mask;
|
||||||
fsnotify_vfsmount_mask = vfsmount_mask;
|
fsnotify_vfsmount_mask = vfsmount_mask;
|
||||||
|
|
||||||
|
mutex_unlock(&fsnotify_grp_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -168,6 +165,8 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
|
||||||
/* clear all inode marks for this group */
|
/* clear all inode marks for this group */
|
||||||
fsnotify_clear_marks_by_group(group);
|
fsnotify_clear_marks_by_group(group);
|
||||||
|
|
||||||
|
synchronize_srcu(&fsnotify_mark_srcu);
|
||||||
|
|
||||||
/* past the point of no return, matches the initial value of 1 */
|
/* past the point of no return, matches the initial value of 1 */
|
||||||
if (atomic_dec_and_test(&group->num_marks))
|
if (atomic_dec_and_test(&group->num_marks))
|
||||||
fsnotify_final_destroy_group(group);
|
fsnotify_final_destroy_group(group);
|
||||||
|
@ -216,12 +215,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
|
||||||
*/
|
*/
|
||||||
__fsnotify_evict_group(group);
|
__fsnotify_evict_group(group);
|
||||||
|
|
||||||
/*
|
|
||||||
* now it's off the list, so the only thing we might care about is
|
|
||||||
* srcu access....
|
|
||||||
*/
|
|
||||||
mutex_unlock(&fsnotify_grp_mutex);
|
mutex_unlock(&fsnotify_grp_mutex);
|
||||||
synchronize_srcu(&fsnotify_grp_srcu);
|
|
||||||
|
|
||||||
/* and now it is really dead. _Nothing_ could be seeing it */
|
/* and now it is really dead. _Nothing_ could be seeing it */
|
||||||
fsnotify_recalc_global_mask();
|
fsnotify_recalc_global_mask();
|
||||||
|
|
|
@ -85,10 +85,12 @@
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/srcu.h>
|
||||||
#include <linux/writeback.h> /* for inode_lock */
|
#include <linux/writeback.h> /* for inode_lock */
|
||||||
|
|
||||||
#include <asm/atomic.h>
|
#include <asm/atomic.h>
|
||||||
|
@ -96,6 +98,11 @@
|
||||||
#include <linux/fsnotify_backend.h>
|
#include <linux/fsnotify_backend.h>
|
||||||
#include "fsnotify.h"
|
#include "fsnotify.h"
|
||||||
|
|
||||||
|
struct srcu_struct fsnotify_mark_srcu;
|
||||||
|
static DEFINE_SPINLOCK(destroy_lock);
|
||||||
|
static LIST_HEAD(destroy_list);
|
||||||
|
static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
|
||||||
|
|
||||||
void fsnotify_get_mark(struct fsnotify_mark *mark)
|
void fsnotify_get_mark(struct fsnotify_mark *mark)
|
||||||
{
|
{
|
||||||
atomic_inc(&mark->refcnt);
|
atomic_inc(&mark->refcnt);
|
||||||
|
@ -144,11 +151,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
|
||||||
|
|
||||||
list_del_init(&mark->g_list);
|
list_del_init(&mark->g_list);
|
||||||
|
|
||||||
fsnotify_put_mark(mark); /* for i_list and g_list */
|
|
||||||
|
|
||||||
spin_unlock(&group->mark_lock);
|
spin_unlock(&group->mark_lock);
|
||||||
spin_unlock(&mark->lock);
|
spin_unlock(&mark->lock);
|
||||||
|
|
||||||
|
spin_lock(&destroy_lock);
|
||||||
|
list_add(&mark->destroy_list, &destroy_list);
|
||||||
|
spin_unlock(&destroy_lock);
|
||||||
|
wake_up(&destroy_waitq);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some groups like to know that marks are being freed. This is a
|
* Some groups like to know that marks are being freed. This is a
|
||||||
* callback to the group function to let it know that this mark
|
* callback to the group function to let it know that this mark
|
||||||
|
@ -263,12 +273,17 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
|
||||||
err:
|
err:
|
||||||
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
|
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
|
||||||
list_del_init(&mark->g_list);
|
list_del_init(&mark->g_list);
|
||||||
|
mark->group = NULL;
|
||||||
atomic_dec(&group->num_marks);
|
atomic_dec(&group->num_marks);
|
||||||
fsnotify_put_mark(mark);
|
|
||||||
|
|
||||||
spin_unlock(&group->mark_lock);
|
spin_unlock(&group->mark_lock);
|
||||||
spin_unlock(&mark->lock);
|
spin_unlock(&mark->lock);
|
||||||
|
|
||||||
|
spin_lock(&destroy_lock);
|
||||||
|
list_add(&mark->destroy_list, &destroy_list);
|
||||||
|
spin_unlock(&destroy_lock);
|
||||||
|
wake_up(&destroy_waitq);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,3 +341,42 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
|
||||||
atomic_set(&mark->refcnt, 1);
|
atomic_set(&mark->refcnt, 1);
|
||||||
mark->free_mark = free_mark;
|
mark->free_mark = free_mark;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fsnotify_mark_destroy(void *ignored)
|
||||||
|
{
|
||||||
|
struct fsnotify_mark *mark, *next;
|
||||||
|
LIST_HEAD(private_destroy_list);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
spin_lock(&destroy_lock);
|
||||||
|
list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) {
|
||||||
|
list_del(&mark->destroy_list);
|
||||||
|
list_add(&mark->destroy_list, &private_destroy_list);
|
||||||
|
}
|
||||||
|
spin_unlock(&destroy_lock);
|
||||||
|
|
||||||
|
synchronize_srcu(&fsnotify_mark_srcu);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
|
||||||
|
list_del_init(&mark->destroy_list);
|
||||||
|
fsnotify_put_mark(mark);
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init fsnotify_mark_init(void)
|
||||||
|
{
|
||||||
|
struct task_struct *thread;
|
||||||
|
|
||||||
|
thread = kthread_run(fsnotify_mark_destroy, NULL,
|
||||||
|
"fsnotify_mark");
|
||||||
|
if (IS_ERR(thread))
|
||||||
|
panic("unable to start fsnotify mark destruction thread.");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
device_initcall(fsnotify_mark_init);
|
||||||
|
|
|
@ -302,6 +302,7 @@ struct fsnotify_mark {
|
||||||
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
|
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
|
||||||
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
|
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
|
||||||
unsigned int flags; /* vfsmount or inode mark? */
|
unsigned int flags; /* vfsmount or inode mark? */
|
||||||
|
struct list_head destroy_list;
|
||||||
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
|
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue