From 606035e76e79b14bf7a7c219140c045a952cc76e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Sep 2013 17:32:12 -0400 Subject: [PATCH 1/8] autofs4: close the races around autofs4_notify_daemon() Don't drop ->wq_mutex before calling autofs4_notify_daemon() only to regain it there. Besides being pointless, that opens a race window where autofs4_wait_release() could've come and freed wq->name.name. And do the debugging printk in the "reused an existing wq" case before dropping ->wq_mutex - the same reason... Signed-off-by: Al Viro Acked-by: Ian Kent --- fs/autofs4/waitq.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 3db70dae40d3..689e40d983ad 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -109,13 +109,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; - mutex_lock(&sbi->wq_mutex); - /* Check if we have become catatonic */ - if (sbi->catatonic) { - mutex_unlock(&sbi->wq_mutex); - return; - } switch (type) { /* Kernel protocol v4 missing and expire packets */ case autofs_ptype_missing: @@ -427,7 +421,6 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ wq->wait_ctr = 2; - mutex_unlock(&sbi->wq_mutex); if (sbi->version < 5) { if (notify == NFY_MOUNT) @@ -449,15 +442,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); - /* autofs4_notify_daemon() may block */ + /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */ autofs4_notify_daemon(sbi, wq, type); } else { wq->wait_ctr++; - mutex_unlock(&sbi->wq_mutex); - kfree(qstr.name); DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); + mutex_unlock(&sbi->wq_mutex); + kfree(qstr.name); } /* From 0854d450e229e37102a76e3ccf065d8e8137846f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:51:55 +0200 Subject: [PATCH 2/8] vfs: improve i_op->atomic_open() documentation Fix documentation of ->atomic_open() and related functions: finish_open() and finish_no_open(). Also add details that seem to be unclear and a source of bugs (some of which are fixed in the following series). Cc-ing maintainers of all filesystems implementing ->atomic_open(). Signed-off-by: Miklos Szeredi Cc: Eric Van Hensbergen Cc: Sage Weil Cc: Steve French Cc: Steven Whitehouse Cc: Trond Myklebust Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 14 +++++++------- fs/open.c | 21 ++++++++++++++++++--- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f93a88250a44..deb48b5fd883 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -359,11 +359,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); - int (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct file *, + unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); -} ____cacheline_aligned; - struct file *, unsigned open_flag, - umode_t create_mode, int *opened); }; Again, all methods are called without any locks being held, unless @@ -470,9 +468,11 @@ otherwise noted. method the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning 1 instead of - usual 0 or -ve . This method is only called if the last - component is negative or needs lookup. Cached positive dentries are - still handled by f_op->open(). + usual 0 or -ve . This method is only called if the last component is + negative or needs lookup. Cached positive dentries are still handled by + f_op->open(). If the file was created, the FILE_CREATED flag should be + set in "opened". In case of O_EXCL the method must only succeed if the + file didn't exist and hence FILE_CREATED shall always be set on success. tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to atomically creating, opening and unlinking a file in given directory. diff --git a/fs/open.c b/fs/open.c index 2a731b0d08bc..d420331ca32a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -744,14 +744,24 @@ cleanup_file: /** * finish_open - finish opening a file - * @od: opaque open data + * @file: file pointer * @dentry: pointer to dentry * @open: open callback + * @opened: state of open * * This can be used to finish opening a file passed to i_op->atomic_open(). * * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. + * + * NB: the dentry reference is _not_ consumed. If, for example, the dentry is + * the return value of d_splice_alias(), then the caller needs to perform dput() + * on it after finish_open(). + * + * On successful return @file is a fully instantiated open file. After this, if + * an error occurs in ->atomic_open(), it needs to clean up with fput(). + * + * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), @@ -772,11 +782,16 @@ EXPORT_SYMBOL(finish_open); /** * finish_no_open - finish ->atomic_open() without opening the file * - * @od: opaque open data + * @file: file pointer * @dentry: dentry or NULL (as returned from ->lookup()) * * This can be used to set the result of a successful lookup in ->atomic_open(). - * The filesystem's atomic_open() method shall return NULL after calling this. + * + * NB: unlike finish_open() this function does consume the dentry reference and + * the caller need not dput() it. + * + * Returns "1" which must be the return value of ->atomic_open() after having + * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { From dfb1d61b0e9f9e2c542e9adc8d970689f4114ff6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:51:59 +0200 Subject: [PATCH 3/8] cifs: fix filp leak in cifs_atomic_open() If an error occurs after having called finish_open() then fput() needs to be called on the already opened file. Signed-off-by: Miklos Szeredi Cc: Steve French Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/cifs/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d3e2eaa503a6..5384c2a640ca 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -500,6 +500,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); + fput(file); rc = -ENOMEM; } From c5bf8fef52ce184bd48b76d1881df07b43f5091b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:52:03 +0200 Subject: [PATCH 4/8] gfs2: set FILE_CREATED In gfs2_create_inode() set FILE_CREATED in *opened. Signed-off-by: Miklos Szeredi Cc: Steven Whitehouse Signed-off-by: Al Viro --- fs/gfs2/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 64915eeae5a7..ced3257f06e8 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -694,8 +694,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); d_instantiate(dentry, inode); - if (file) + if (file) { + *opened |= FILE_CREATED; error = finish_open(file, dentry, gfs2_open_common, opened); + } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); return error; From 01c919abaf2f3d6a8e59eddf4ee22df1631ab067 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:52:04 +0200 Subject: [PATCH 5/8] nfs: set FILE_CREATED Set FILE_CREATED on O_CREAT|O_EXCL. If the NFS server honored our request for exclusivity then this must be correct. Currently this is a no-op, since the VFS sets FILE_CREATED anyway. The next patch will, however, require this flag to be always set by filesystems. Signed-off-by: Miklos Szeredi Cc: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index de434f309af0..854a8f05a610 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1392,6 +1392,9 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; + if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + *opened |= FILE_CREATED; + err = finish_open(file, dentry, do_open, opened); if (err) goto out; From 116cc0225381415b96551f725455d067f63a76a0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:52:05 +0200 Subject: [PATCH 6/8] vfs: don't set FILE_CREATED before calling ->atomic_open() If O_CREAT|O_EXCL are passed to open, then we know that either - the file is successfully created, or - the operation fails in some way. So previously we set FILE_CREATED before calling ->atomic_open() so the filesystem doesn't have to. This, however, led to bugs in the implementation that went unnoticed when the filesystem didn't check for existence, yet returned success. To prevent this kind of bug, require filesystems to always explicitly set FILE_CREATED on O_CREAT|O_EXCL and verify this in the VFS. Also added a couple more verifications for the result of atomic_open(): - Warn if filesystem set FILE_CREATED despite the lack of O_CREAT. - Warn if filesystem set FILE_CREATED but gave a negative dentry. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 0dc4cbf21f37..22eb5484774c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2656,6 +2656,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, int acc_mode; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; + bool excl; BUG_ON(dentry->d_inode); @@ -2669,10 +2670,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); - if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { + excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); + if (excl) open_flag &= ~O_TRUNC; - *opened |= FILE_CREATED; - } /* * Checking write permission is tricky, bacuse we don't know if we are @@ -2726,7 +2726,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } acc_mode = op->acc_mode; + if (WARN_ON(excl && !(*opened & FILE_CREATED))) + *opened |= FILE_CREATED; + if (*opened & FILE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); fsnotify_create(dir, dentry); acc_mode = MAY_OPEN; } @@ -2740,6 +2744,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } + WARN_ON(!dentry->d_inode && (*opened & FILE_CREATED)); if (create_error && dentry->d_inode == NULL) { error = create_error; goto out; From 03da633aa7b08bdc4d86e9c2780bb89277b65cd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 Sep 2013 19:22:33 -0400 Subject: [PATCH 7/8] atomic_open: take care of EEXIST in no-open case with O_CREAT|O_EXCL in fs/namei.c Signed-off-by: Al Viro --- fs/9p/vfs_inode_dotl.c | 8 +------- fs/namei.c | 33 +++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 53687bbf2296..a7c481402c46 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -267,14 +267,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT)) + if (!(flags & O_CREAT) || dentry->d_inode) return finish_no_open(file, res); - else if (dentry->d_inode) { - if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - return -EEXIST; - else - return finish_no_open(file, res); - } v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/namei.c b/fs/namei.c index 22eb5484774c..645268f23eb6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2725,16 +2725,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - acc_mode = op->acc_mode; - if (WARN_ON(excl && !(*opened & FILE_CREATED))) - *opened |= FILE_CREATED; - - if (*opened & FILE_CREATED) { - WARN_ON(!(open_flag & O_CREAT)); - fsnotify_create(dir, dentry); - acc_mode = MAY_OPEN; - } - if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; @@ -2744,10 +2734,19 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } - WARN_ON(!dentry->d_inode && (*opened & FILE_CREATED)); - if (create_error && dentry->d_inode == NULL) { - error = create_error; - goto out; + if (*opened & FILE_CREATED) + fsnotify_create(dir, dentry); + if (!dentry->d_inode) { + WARN_ON(*opened & FILE_CREATED); + if (create_error) { + error = create_error; + goto out; + } + } else { + if (excl && !(*opened & FILE_CREATED)) { + error = -EEXIST; + goto out; + } } goto looked_up; } @@ -2756,6 +2755,12 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ + acc_mode = op->acc_mode; + if (*opened & FILE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); + fsnotify_create(dir, dentry); + acc_mode = MAY_OPEN; + } error = may_open(&file->f_path, acc_mode, open_flag); if (error) fput(file); From 8061a6fa564fe0e71601632758b78d2ba737663c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Sep 2013 08:10:18 -0400 Subject: [PATCH 8/8] 9p: don't forget to destroy inode cache if fscache registration fails Signed-off-by: Al Viro --- fs/9p/v9fs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 58e6cbce4156..08f2e1e9a7e6 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -603,10 +603,11 @@ static int v9fs_cache_register(void) if (ret < 0) return ret; #ifdef CONFIG_9P_FSCACHE - return fscache_register_netfs(&v9fs_cache_netfs); -#else - return ret; + ret = fscache_register_netfs(&v9fs_cache_netfs); + if (ret < 0) + v9fs_destroy_inode_cache(); #endif + return ret; } static void v9fs_cache_unregister(void)